From 5a8826463c19b0d1a2fc60b2adac0ec318047844 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Wed, 23 Mar 2011 08:35:07 +0100 Subject: ALSA: HDA: Realtek: Avoid unnecessary volume control index on Surround/Side From: David Henningsson commit 5a8826463c19b0d1a2fc60b2adac0ec318047844 upstream. Similar to commit 7e59e097c09b82760bb0fe08b0fa2b704d76c3f4, this patch avoids unnecessary volume control indices for more Realtek auto-parsers, e g the ALC66x family, on the "Surround" and "Side" controls. These indices cause these volume controls to be ignored by PulseAudio and vmaster and should be removed whenever possible. Reported-by: Jan Losinski Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -16085,9 +16085,12 @@ static int alc861_auto_create_multi_out_ return err; } else { const char *name = pfx; - if (!name) + int index = i; + if (!name) { name = chname[i]; - err = __alc861_create_out_sw(codec, name, nid, i, 3); + index = 0; + } + err = __alc861_create_out_sw(codec, name, nid, index, 3); if (err < 0) return err; } @@ -17238,16 +17241,19 @@ static int alc861vd_auto_create_multi_ou return err; } else { const char *name = pfx; - if (!name) + int index = i; + if (!name) { name = chname[i]; + index = 0; + } err = __add_pb_vol_ctrl(spec, ALC_CTL_WIDGET_VOL, - name, i, + name, index, HDA_COMPOSE_AMP_VAL(nid_v, 3, 0, HDA_OUTPUT)); if (err < 0) return err; err = __add_pb_sw_ctrl(spec, ALC_CTL_BIND_MUTE, - name, i, + name, index, HDA_COMPOSE_AMP_VAL(nid_s, 3, 2, HDA_INPUT)); if (err < 0) @@ -19296,12 +19302,15 @@ static int alc662_auto_create_multi_out_ return err; } else { const char *name = pfx; - if (!name) + int index = i; + if (!name) { name = chname[i]; - err = __alc662_add_vol_ctl(spec, name, nid, i, 3); + index = 0; + } + err = __alc662_add_vol_ctl(spec, name, nid, index, 3); if (err < 0) return err; - err = __alc662_add_sw_ctl(spec, name, mix, i, 3); + err = __alc662_add_sw_ctl(spec, name, mix, index, 3); if (err < 0) return err; } From bff5fbf50bd498c217994bd2d41a53ac3141185a Mon Sep 17 00:00:00 2001 From: Lydia Wang Date: Tue, 22 Mar 2011 16:21:38 +0800 Subject: ALSA: hda - VIA: Fix stereo mixer recording no sound issue From: Lydia Wang commit bff5fbf50bd498c217994bd2d41a53ac3141185a upstream. Modify function via_mux_enum_put() to fix stereo mixer recording no sound issue. Signed-off-by: Lydia Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_via.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -1101,6 +1101,7 @@ static int via_mux_enum_put(struct snd_k struct hda_codec *codec = snd_kcontrol_chip(kcontrol); struct via_spec *spec = codec->spec; unsigned int adc_idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id); + int ret; if (!spec->mux_nids[adc_idx]) return -EINVAL; @@ -1109,12 +1110,14 @@ static int via_mux_enum_put(struct snd_k AC_VERB_GET_POWER_STATE, 0x00) != AC_PWRST_D0) snd_hda_codec_write(codec, spec->mux_nids[adc_idx], 0, AC_VERB_SET_POWER_STATE, AC_PWRST_D0); - /* update jack power state */ - set_jack_power_state(codec); - return snd_hda_input_mux_put(codec, spec->input_mux, ucontrol, + ret = snd_hda_input_mux_put(codec, spec->input_mux, ucontrol, spec->mux_nids[adc_idx], &spec->cur_mux[adc_idx]); + /* update jack power state */ + set_jack_power_state(codec); + + return ret; } static int via_independent_hp_info(struct snd_kcontrol *kcontrol, From ce0e5a9e81fbb153ee15ca60246c6722f07fc546 Mon Sep 17 00:00:00 2001 From: Lydia Wang Date: Tue, 22 Mar 2011 16:22:37 +0800 Subject: ALSA: hda - VIA: Fix independent headphone no sound issue From: Lydia Wang commit ce0e5a9e81fbb153ee15ca60246c6722f07fc546 upstream. Modify via_independent_hp_put() function to support VT1718S and VT1812 codecs, and fix independent headphone no sound issue. Signed-off-by: Lydia Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_via.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -1191,8 +1191,16 @@ static int via_independent_hp_put(struct /* Get Independent Mode index of headphone pin widget */ spec->hp_independent_mode = spec->hp_independent_mode_index == pinsel ? 1 : 0; - snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_CONNECT_SEL, pinsel); + if (spec->codec_type == VT1718S) + snd_hda_codec_write(codec, nid, 0, + AC_VERB_SET_CONNECT_SEL, pinsel ? 2 : 0); + else + snd_hda_codec_write(codec, nid, 0, + AC_VERB_SET_CONNECT_SEL, pinsel); + if (spec->codec_type == VT1812) + snd_hda_codec_write(codec, 0x35, 0, + AC_VERB_SET_CONNECT_SEL, pinsel); if (spec->multiout.hp_nid && spec->multiout.hp_nid != spec->multiout.dac_nids[HDA_FRONT]) snd_hda_codec_setup_stream(codec, spec->multiout.hp_nid, @@ -1211,6 +1219,8 @@ static int via_independent_hp_put(struct activate_ctl(codec, "Headphone Playback Switch", spec->hp_independent_mode); } + /* update jack power state */ + set_jack_power_state(codec); return 0; } From ab657e0cacc39d88145871c6a3c844597c02d406 Mon Sep 17 00:00:00 2001 From: Lydia Wang Date: Tue, 22 Mar 2011 16:23:23 +0800 Subject: ALSA: hda - VIA: Add missing support for VT1718S in A-A path From: Lydia Wang commit ab657e0cacc39d88145871c6a3c844597c02d406 upstream. Modify mute_aa_path() function to support VT1718S codec. Signed-off-by: Lydia Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_via.c | 5 +++++ 1 file changed, 5 insertions(+) --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -1323,6 +1323,11 @@ static void mute_aa_path(struct hda_code start_idx = 2; end_idx = 4; break; + case VT1718S: + nid_mixer = 0x21; + start_idx = 1; + end_idx = 3; + break; default: return; } From 169222813eec8403c76394fb7b35ecab98e3c607 Mon Sep 17 00:00:00 2001 From: Lydia Wang Date: Tue, 22 Mar 2011 16:24:10 +0800 Subject: ALSA: hda - VIA: Fix invalid A-A path volume adjust issue From: Lydia Wang commit 169222813eec8403c76394fb7b35ecab98e3c607 upstream. Modify vt_auto_create_analog_input_ctls() function to fix invalid a-a path volume adjust issue for VT1708S, VT1702 and VT1716S codecs. Signed-off-by: Lydia Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_via.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -2456,7 +2456,14 @@ static int vt_auto_create_analog_input_c else type_idx = 0; label = hda_get_autocfg_input_label(codec, cfg, i); - err = via_new_analog_input(spec, label, type_idx, idx, cap_nid); + if (spec->codec_type == VT1708S || + spec->codec_type == VT1702 || + spec->codec_type == VT1716S) + err = via_new_analog_input(spec, label, type_idx, + idx+1, cap_nid); + else + err = via_new_analog_input(spec, label, type_idx, + idx, cap_nid); if (err < 0) return err; snd_hda_add_imux_item(imux, label, idx, NULL); From 0341ccd7557fecafe6a79c55158670cf629d269e Mon Sep 17 00:00:00 2001 From: Lydia Wang Date: Tue, 22 Mar 2011 16:25:03 +0800 Subject: ALSA: hda - VIA: Fix codec type for VT1708BCE at the right timing From: Lydia Wang commit 0341ccd7557fecafe6a79c55158670cf629d269e upstream. Add get_codec_type() in via_new_spec() function to make sure getting correct codec type before building mixer controls. Signed-off-by: Lydia Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_via.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -159,6 +159,7 @@ struct via_spec { #endif }; +static enum VIA_HDA_CODEC get_codec_type(struct hda_codec *codec); static struct via_spec * via_new_spec(struct hda_codec *codec) { struct via_spec *spec; @@ -169,6 +170,10 @@ static struct via_spec * via_new_spec(st codec->spec = spec; spec->codec = codec; + spec->codec_type = get_codec_type(codec); + /* VT1708BCE & VT1708S are almost same */ + if (spec->codec_type == VT1708BCE) + spec->codec_type = VT1708S; return spec; } @@ -2203,10 +2208,6 @@ static int via_init(struct hda_codec *co for (i = 0; i < spec->num_iverbs; i++) snd_hda_sequence_write(codec, spec->init_verbs[i]); - spec->codec_type = get_codec_type(codec); - if (spec->codec_type == VT1708BCE) - spec->codec_type = VT1708S; /* VT1708BCE & VT1708S are almost - same */ /* Lydia Add for EAPD enable */ if (!spec->dig_in_nid) { /* No Digital In connection */ if (spec->dig_in_pin) { From ee3c35c0827de02de414d08b2ddcbb910c2263ab Mon Sep 17 00:00:00 2001 From: Lydia Wang Date: Tue, 22 Mar 2011 16:26:36 +0800 Subject: ALSA: hda - VIA: Fix VT1708 can't build up Headphone control issue From: Lydia Wang commit ee3c35c0827de02de414d08b2ddcbb910c2263ab upstream. Since VT1708 didn't support the control of getting connection number, building of headphone control will fail in via_hp_build() function. Signed-off-by: Lydia Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_via.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -1266,9 +1266,12 @@ static int via_hp_build(struct hda_codec break; } - nums = snd_hda_get_connections(codec, nid, conn, HDA_MAX_CONNECTIONS); - if (nums <= 1) - return 0; + if (spec->codec_type != VT1708) { + nums = snd_hda_get_connections(codec, nid, + conn, HDA_MAX_CONNECTIONS); + if (nums <= 1) + return 0; + } knew = via_clone_control(spec, &via_hp_mixer[0]); if (knew == NULL) From a110f4ef810ee29d810876df725f41d66629733e Mon Sep 17 00:00:00 2001 From: Marek Belisko Date: Wed, 9 Mar 2011 21:46:20 +0100 Subject: ASoC: mini2440: Fix uda134x codec problem. From: Marek Belisko commit a110f4ef810ee29d810876df725f41d66629733e upstream. ASoC audio for mini2440 platform in current kenrel doesn't work. First problem is samsung_asoc_dma device is missing in initialization. Next problem is with codec. Codec is initialized but never probed because no platform_device exist for codec driver. It leads to errors during codec binding to asoc dai. Next problem was platform data which was passed from board to asoc main driver but not passed to codec when called codec_soc_probe(). Following patch should fix issues. But not sure if in correct way. Please review. Signed-off-by: Marek Belisko Acked-by: Liam Girdwood Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-s3c2440/mach-mini2440.c | 7 +++++++ sound/soc/codecs/uda134x.c | 3 ++- sound/soc/samsung/s3c24xx_uda134x.c | 3 ++- 3 files changed, 11 insertions(+), 2 deletions(-) --- a/arch/arm/mach-s3c2440/mach-mini2440.c +++ b/arch/arm/mach-s3c2440/mach-mini2440.c @@ -506,6 +506,11 @@ static struct i2c_board_info mini2440_i2 }, }; +static struct platform_device uda1340_codec = { + .name = "uda134x-codec", + .id = -1, +}; + static struct platform_device *mini2440_devices[] __initdata = { &s3c_device_ohci, &s3c_device_wdt, @@ -521,7 +526,9 @@ static struct platform_device *mini2440_ &s3c_device_nand, &s3c_device_sdi, &s3c_device_iis, + &uda1340_codec, &mini2440_audio, + &samsung_asoc_dma, }; static void __init mini2440_map_io(void) --- a/sound/soc/codecs/uda134x.c +++ b/sound/soc/codecs/uda134x.c @@ -486,7 +486,8 @@ static struct snd_soc_dai_driver uda134x static int uda134x_soc_probe(struct snd_soc_codec *codec) { struct uda134x_priv *uda134x; - struct uda134x_platform_data *pd = dev_get_drvdata(codec->card->dev); + struct uda134x_platform_data *pd = codec->card->dev->platform_data; + int ret; printk(KERN_INFO "UDA134X SoC Audio Codec\n"); --- a/sound/soc/samsung/s3c24xx_uda134x.c +++ b/sound/soc/samsung/s3c24xx_uda134x.c @@ -226,7 +226,7 @@ static struct snd_soc_ops s3c24xx_uda134 static struct snd_soc_dai_link s3c24xx_uda134x_dai_link = { .name = "UDA134X", .stream_name = "UDA134X", - .codec_name = "uda134x-hifi", + .codec_name = "uda134x-codec", .codec_dai_name = "uda134x-hifi", .cpu_dai_name = "s3c24xx-iis", .ops = &s3c24xx_uda134x_ops, @@ -321,6 +321,7 @@ static int s3c24xx_uda134x_probe(struct platform_set_drvdata(s3c24xx_uda134x_snd_device, &snd_soc_s3c24xx_uda134x); + platform_device_add_data(s3c24xx_uda134x_snd_device, &s3c24xx_uda134x, sizeof(s3c24xx_uda134x)); ret = platform_device_add(s3c24xx_uda134x_snd_device); if (ret) { printk(KERN_ERR "S3C24XX_UDA134X SoC Audio: Unable to add\n"); From 3a7da39d165e0c363c294feec119db1427032afd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 17 Mar 2011 07:34:32 +0000 Subject: ethtool: Compat handling for struct ethtool_rxnfc From: Ben Hutchings commit 3a7da39d165e0c363c294feec119db1427032afd upstream. This structure was accidentally defined such that its layout can differ between 32-bit and 64-bit processes. Add compat structure definitions and an ioctl wrapper function. Signed-off-by: Ben Hutchings Acked-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ethtool.h | 34 ++++++++++++++ net/socket.c | 114 +++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 141 insertions(+), 7 deletions(-) --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -13,6 +13,9 @@ #ifndef _LINUX_ETHTOOL_H #define _LINUX_ETHTOOL_H +#ifdef __KERNEL__ +#include +#endif #include #include @@ -449,6 +452,37 @@ struct ethtool_rxnfc { __u32 rule_locs[0]; }; +#ifdef __KERNEL__ +#ifdef CONFIG_COMPAT + +struct compat_ethtool_rx_flow_spec { + u32 flow_type; + union { + struct ethtool_tcpip4_spec tcp_ip4_spec; + struct ethtool_tcpip4_spec udp_ip4_spec; + struct ethtool_tcpip4_spec sctp_ip4_spec; + struct ethtool_ah_espip4_spec ah_ip4_spec; + struct ethtool_ah_espip4_spec esp_ip4_spec; + struct ethtool_usrip4_spec usr_ip4_spec; + struct ethhdr ether_spec; + u8 hdata[72]; + } h_u, m_u; + compat_u64 ring_cookie; + u32 location; +}; + +struct compat_ethtool_rxnfc { + u32 cmd; + u32 flow_type; + compat_u64 data; + struct compat_ethtool_rx_flow_spec fs; + u32 rule_cnt; + u32 rule_locs[0]; +}; + +#endif /* CONFIG_COMPAT */ +#endif /* __KERNEL__ */ + /** * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR --- a/net/socket.c +++ b/net/socket.c @@ -2583,23 +2583,123 @@ static int dev_ifconf(struct net *net, s static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) { + struct compat_ethtool_rxnfc __user *compat_rxnfc; + bool convert_in = false, convert_out = false; + size_t buf_size = ALIGN(sizeof(struct ifreq), 8); + struct ethtool_rxnfc __user *rxnfc; struct ifreq __user *ifr; + u32 rule_cnt = 0, actual_rule_cnt; + u32 ethcmd; u32 data; - void __user *datap; + int ret; - ifr = compat_alloc_user_space(sizeof(*ifr)); + if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + return -EFAULT; - if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + compat_rxnfc = compat_ptr(data); + + if (get_user(ethcmd, &compat_rxnfc->cmd)) return -EFAULT; - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + /* Most ethtool structures are defined without padding. + * Unfortunately struct ethtool_rxnfc is an exception. + */ + switch (ethcmd) { + default: + break; + case ETHTOOL_GRXCLSRLALL: + /* Buffer size is variable */ + if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) + return -EFAULT; + if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) + return -ENOMEM; + buf_size += rule_cnt * sizeof(u32); + /* fall through */ + case ETHTOOL_GRXRINGS: + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + convert_out = true; + /* fall through */ + case ETHTOOL_SRXCLSRLDEL: + case ETHTOOL_SRXCLSRLINS: + buf_size += sizeof(struct ethtool_rxnfc); + convert_in = true; + break; + } + + ifr = compat_alloc_user_space(buf_size); + rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8); + + if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT; - datap = compat_ptr(data); - if (put_user(datap, &ifr->ifr_ifru.ifru_data)) + if (put_user(convert_in ? rxnfc : compat_ptr(data), + &ifr->ifr_ifru.ifru_data)) return -EFAULT; - return dev_ioctl(net, SIOCETHTOOL, ifr); + if (convert_in) { + /* We expect there to be holes between fs.m_u and + * fs.ring_cookie and at the end of fs, but nowhere else. + */ + BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) + + sizeof(compat_rxnfc->fs.m_u) != + offsetof(struct ethtool_rxnfc, fs.m_u) + + sizeof(rxnfc->fs.m_u)); + BUILD_BUG_ON( + offsetof(struct compat_ethtool_rxnfc, fs.location) - + offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != + offsetof(struct ethtool_rxnfc, fs.location) - + offsetof(struct ethtool_rxnfc, fs.ring_cookie)); + + if (copy_in_user(rxnfc, compat_rxnfc, + (void *)(&rxnfc->fs.m_u + 1) - + (void *)rxnfc) || + copy_in_user(&rxnfc->fs.ring_cookie, + &compat_rxnfc->fs.ring_cookie, + (void *)(&rxnfc->fs.location + 1) - + (void *)&rxnfc->fs.ring_cookie) || + copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) + return -EFAULT; + } + + ret = dev_ioctl(net, SIOCETHTOOL, ifr); + if (ret) + return ret; + + if (convert_out) { + if (copy_in_user(compat_rxnfc, rxnfc, + (const void *)(&rxnfc->fs.m_u + 1) - + (const void *)rxnfc) || + copy_in_user(&compat_rxnfc->fs.ring_cookie, + &rxnfc->fs.ring_cookie, + (const void *)(&rxnfc->fs.location + 1) - + (const void *)&rxnfc->fs.ring_cookie) || + copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) + return -EFAULT; + + if (ethcmd == ETHTOOL_GRXCLSRLALL) { + /* As an optimisation, we only copy the actual + * number of rules that the underlying + * function returned. Since Mallory might + * change the rule count in user memory, we + * check that it is less than the rule count + * originally given (as the user buffer size), + * which has been range-checked. + */ + if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) + return -EFAULT; + if (actual_rule_cnt < rule_cnt) + rule_cnt = actual_rule_cnt; + if (copy_in_user(&compat_rxnfc->rule_locs[0], + &rxnfc->rule_locs[0], + rule_cnt * sizeof(u32))) + return -EFAULT; + } + } + + return 0; } static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) From 3ff84a7f36554b257cd57325b1a7c1fa4b49fbe3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 14 Feb 2011 17:46:21 +0200 Subject: Revert "slab: Fix missing DEBUG_SLAB last user" From: Pekka Enberg commit 3ff84a7f36554b257cd57325b1a7c1fa4b49fbe3 upstream. This reverts commit 5c5e3b33b7cb959a401f823707bee006caadd76e. The commit breaks ARM thusly: | Mount-cache hash table entries: 512 | slab error in verify_redzone_free(): cache `idr_layer_cache': memory outside object was overwritten | Backtrace: | [] (dump_backtrace+0x0/0x110) from [] (dump_stack+0x18/0x1c) | [] (dump_stack+0x0/0x1c) from [] (__slab_error+0x28/0x30) | [] (__slab_error+0x0/0x30) from [] (cache_free_debugcheck+0x1c0/0x2b8) | [] (cache_free_debugcheck+0x0/0x2b8) from [] (kmem_cache_free+0x3c/0xc0) | [] (kmem_cache_free+0x0/0xc0) from [] (ida_get_new_above+0x19c/0x1c0) | [] (ida_get_new_above+0x0/0x1c0) from [] (alloc_vfsmnt+0x54/0x144) | [] (alloc_vfsmnt+0x0/0x144) from [] (vfs_kern_mount+0x30/0xec) | [] (vfs_kern_mount+0x0/0xec) from [] (kern_mount_data+0x1c/0x20) | [] (kern_mount_data+0x0/0x20) from [] (sysfs_init+0x68/0xc8) | [] (sysfs_init+0x0/0xc8) from [] (mnt_init+0x90/0x1b0) | [] (mnt_init+0x0/0x1b0) from [] (vfs_caches_init+0x100/0x140) | [] (vfs_caches_init+0x0/0x140) from [] (start_kernel+0x2e8/0x368) | [] (start_kernel+0x0/0x368) from [] (__enable_mmu+0x0/0x2c) | c0113268: redzone 1:0xd84156c5c032b3ac, redzone 2:0xd84156c5635688c0. | slab error in cache_alloc_debugcheck_after(): cache `idr_layer_cache': double free, or memory outside object was overwritten | ... | c011307c: redzone 1:0x9f91102ffffffff, redzone 2:0x9f911029d74e35b | slab: Internal list corruption detected in cache 'idr_layer_cache'(24), slabp c0113000(16). Hexdump: | | 000: 20 4f 10 c0 20 4f 10 c0 7c 00 00 00 7c 30 11 c0 | 010: 10 00 00 00 10 00 00 00 00 00 c9 17 fe ff ff ff | 020: fe ff ff ff fe ff ff ff fe ff ff ff fe ff ff ff | 030: fe ff ff ff fe ff ff ff fe ff ff ff fe ff ff ff | 040: fe ff ff ff fe ff ff ff fe ff ff ff fe ff ff ff | 050: fe ff ff ff fe ff ff ff fe ff ff ff 11 00 00 00 | 060: 12 00 00 00 13 00 00 00 14 00 00 00 15 00 00 00 | 070: 16 00 00 00 17 00 00 00 c0 88 56 63 | kernel BUG at /home/rmk/git/linux-2.6-rmk/mm/slab.c:2928! Reference: https://lkml.org/lkml/2011/2/7/238 Reported-and-analyzed-by: Russell King Signed-off-by: Pekka Enberg Signed-off-by: Greg Kroah-Hartman --- mm/slab.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) --- a/mm/slab.c +++ b/mm/slab.c @@ -2288,8 +2288,8 @@ kmem_cache_create (const char *name, siz if (ralign < align) { ralign = align; } - /* disable debug if not aligning with REDZONE_ALIGN */ - if (ralign & (__alignof__(unsigned long long) - 1)) + /* disable debug if necessary */ + if (ralign > __alignof__(unsigned long long)) flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); /* * 4) Store it. @@ -2315,8 +2315,8 @@ kmem_cache_create (const char *name, siz */ if (flags & SLAB_RED_ZONE) { /* add space for red zone words */ - cachep->obj_offset += align; - size += align + sizeof(unsigned long long); + cachep->obj_offset += sizeof(unsigned long long); + size += 2 * sizeof(unsigned long long); } if (flags & SLAB_STORE_USER) { /* user store requires one word storage behind the end of From e91f90bb0bb10be9cc8efd09a3cf4ecffcad0db1 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 22 Mar 2011 16:35:10 -0700 Subject: aio: wake all waiters when destroying ctx From: Roland Dreier commit e91f90bb0bb10be9cc8efd09a3cf4ecffcad0db1 upstream. The test program below will hang because io_getevents() uses add_wait_queue_exclusive(), which means the wake_up() in io_destroy() only wakes up one of the threads. Fix this by using wake_up_all() in the aio code paths where we want to make sure no one gets stuck. // t.c -- compile with gcc -lpthread -laio t.c #include #include #include #include static const int nthr = 2; void *getev(void *ctx) { struct io_event ev; io_getevents(ctx, 1, 1, &ev, NULL); printf("io_getevents returned\n"); return NULL; } int main(int argc, char *argv[]) { io_context_t ctx = 0; pthread_t thread[nthr]; int i; io_setup(1024, &ctx); for (i = 0; i < nthr; ++i) pthread_create(&thread[i], NULL, getev, ctx); sleep(1); io_destroy(ctx); for (i = 0; i < nthr; ++i) pthread_join(thread[i], NULL); return 0; } Signed-off-by: Roland Dreier Reviewed-by: Jeff Moyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/fs/aio.c +++ b/fs/aio.c @@ -520,7 +520,7 @@ static inline void really_put_req(struct ctx->reqs_active--; if (unlikely(!ctx->reqs_active && ctx->dead)) - wake_up(&ctx->wait); + wake_up_all(&ctx->wait); } static void aio_fput_routine(struct work_struct *data) @@ -1229,7 +1229,7 @@ static void io_destroy(struct kioctx *io * by other CPUs at this point. Right now, we rely on the * locking done by the above calls to ensure this consistency. */ - wake_up(&ioctx->wait); + wake_up_all(&ioctx->wait); put_ioctx(ioctx); /* once for the lookup */ } From 8d2587970b8bdf7c8d9208e3f4bb93182aef1a0f Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Tue, 22 Mar 2011 16:30:13 -0700 Subject: cgroups: if you list_empty() a head then don't list_del() it From: Phil Carmody commit 8d2587970b8bdf7c8d9208e3f4bb93182aef1a0f upstream. list_del() leaves poison in the prev and next pointers. The next list_empty() will compare those poisons, and say the list isn't empty. Any list operations that assume the node is on a list because of such a check will be fooled into dereferencing poison. One needs to INIT the node after the del, and fortunately there's already a wrapper for that - list_del_init(). Some of the dels are followed by deallocations, so can be ignored, and one can be merged with an add to make a move. Apart from that, I erred on the side of caution in making nodes list_empty()-queriable. Signed-off-by: Phil Carmody Reviewed-by: Paul Menage Cc: Li Zefan Acked-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1813,10 +1813,8 @@ int cgroup_attach_task(struct cgroup *cg /* Update the css_set linked lists if we're using them */ write_lock(&css_set_lock); - if (!list_empty(&tsk->cg_list)) { - list_del(&tsk->cg_list); - list_add(&tsk->cg_list, &newcg->tasks); - } + if (!list_empty(&tsk->cg_list)) + list_move(&tsk->cg_list, &newcg->tasks); write_unlock(&css_set_lock); for_each_subsys(root, ss) { @@ -3655,12 +3653,12 @@ again: spin_lock(&release_list_lock); set_bit(CGRP_REMOVED, &cgrp->flags); if (!list_empty(&cgrp->release_list)) - list_del(&cgrp->release_list); + list_del_init(&cgrp->release_list); spin_unlock(&release_list_lock); cgroup_lock_hierarchy(cgrp->root); /* delete this cgroup from parent->children */ - list_del(&cgrp->sibling); + list_del_init(&cgrp->sibling); cgroup_unlock_hierarchy(cgrp->root); d = dget(cgrp->dentry); @@ -3879,7 +3877,7 @@ void cgroup_unload_subsys(struct cgroup_ subsys[ss->subsys_id] = NULL; /* remove subsystem from rootnode's list of subsystems */ - list_del(&ss->sibling); + list_del_init(&ss->sibling); /* * disentangle the css from all css_sets attached to the dummytop. as @@ -4253,7 +4251,7 @@ void cgroup_exit(struct task_struct *tsk if (!list_empty(&tsk->cg_list)) { write_lock(&css_set_lock); if (!list_empty(&tsk->cg_list)) - list_del(&tsk->cg_list); + list_del_init(&tsk->cg_list); write_unlock(&css_set_lock); } From bee4c36a5cf5c9f63ce1d7372aa62045fbd16d47 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 22 Mar 2011 16:33:43 -0700 Subject: shmem: let shared anonymous be nonlinear again From: Hugh Dickins commit bee4c36a5cf5c9f63ce1d7372aa62045fbd16d47 upstream. Up to 2.6.22, you could use remap_file_pages(2) on a tmpfs file or a shared mapping of /dev/zero or a shared anonymous mapping. In 2.6.23 we disabled it by default, but set VM_CAN_NONLINEAR to enable it on safe mappings. We made sure to set it in shmem_mmap() for tmpfs files, but missed it in shmem_zero_setup() for the others. Fix that at last. Reported-by: Kenny Simpson Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 1 + 1 file changed, 1 insertion(+) --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2791,5 +2791,6 @@ int shmem_zero_setup(struct vm_area_stru fput(vma->vm_file); vma->vm_file = file; vma->vm_ops = &shmem_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } From 52c50567d8ab0a0a87f12cceaa4194967854f0bd Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 22 Mar 2011 16:30:08 -0700 Subject: mm: swap: unlock swapfile inode mutex before closing file on bad swapfiles From: Mel Gorman commit 52c50567d8ab0a0a87f12cceaa4194967854f0bd upstream. If an administrator tries to swapon a file backed by NFS, the inode mutex is taken (as it is for any swapfile) but later identified to be a bad swapfile due to the lack of bmap and tries to cleanup. During cleanup, an attempt is made to close the file but with inode->i_mutex still held. Closing an NFS file syncs it which tries to acquire the inode mutex leading to deadlock. If lockdep is enabled the following appears on the console; ============================================= [ INFO: possible recursive locking detected ] 2.6.38-rc8-autobuild #1 --------------------------------------------- swapon/2192 is trying to acquire lock: (&sb->s_type->i_mutex_key#13){+.+.+.}, at: vfs_fsync_range+0x47/0x7c but task is already holding lock: (&sb->s_type->i_mutex_key#13){+.+.+.}, at: sys_swapon+0x28d/0xae7 other info that might help us debug this: 1 lock held by swapon/2192: #0: (&sb->s_type->i_mutex_key#13){+.+.+.}, at: sys_swapon+0x28d/0xae7 stack backtrace: Pid: 2192, comm: swapon Not tainted 2.6.38-rc8-autobuild #1 Call Trace: __lock_acquire+0x2eb/0x1623 find_get_pages_tag+0x14a/0x174 pagevec_lookup_tag+0x25/0x2e vfs_fsync_range+0x47/0x7c lock_acquire+0xd3/0x100 vfs_fsync_range+0x47/0x7c nfs_flush_one+0x0/0xdf [nfs] mutex_lock_nested+0x40/0x2b1 vfs_fsync_range+0x47/0x7c vfs_fsync_range+0x47/0x7c vfs_fsync+0x1c/0x1e nfs_file_flush+0x64/0x69 [nfs] filp_close+0x43/0x72 sys_swapon+0xa39/0xae7 sysret_check+0x2e/0x69 system_call_fastpath+0x16/0x1b This patch releases the mutex if its held before calling filep_close() so swapon fails as expected without deadlock when the swapfile is backed by NFS. If accepted for 2.6.39, it should also be considered a -stable candidate for 2.6.38 and 2.6.37. Signed-off-by: Mel Gorman Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/swapfile.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2149,8 +2149,13 @@ bad_swap_2: p->flags = 0; spin_unlock(&swap_lock); vfree(swap_map); - if (swap_file) + if (swap_file) { + if (did_down) { + mutex_unlock(&inode->i_mutex); + did_down = 0; + } filp_close(swap_file, NULL); + } out: if (page && !IS_ERR(page)) { kunmap(page); From d527caf22e48480b102c7c6ee5b9ba12170148f7 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 22 Mar 2011 16:30:38 -0700 Subject: mm: compaction: prevent kswapd compacting memory to reduce CPU usage From: Andrea Arcangeli commit d527caf22e48480b102c7c6ee5b9ba12170148f7 upstream. This patch reverts 5a03b051 ("thp: use compaction in kswapd for GFP_ATOMIC order > 0") due to reports stating that kswapd CPU usage was higher and IRQs were being disabled more frequently. This was reported at http://www.spinics.net/linux/fedora/alsa-user/msg09885.html. Without this patch applied, CPU usage by kswapd hovers around the 20% mark according to the tester (Arthur Marsh: http://www.spinics.net/linux/fedora/alsa-user/msg09899.html). With this patch applied, it's around 2%. The problem is not related to THP which specifies __GFP_NO_KSWAPD but is triggered by high-order allocations hitting the low watermark for their order and waking kswapd on kernels with CONFIG_COMPACTION set. The most common trigger for this is network cards configured for jumbo frames but it's also possible it'll be triggered by fork-heavy workloads (order-1) and some wireless cards which depend on order-1 allocations. The symptoms for the user will be high CPU usage by kswapd in low-memory situations which could be confused with another writeback problem. While a patch like 5a03b051 may be reintroduced in the future, this patch plays it safe for now and reverts it. [mel@csn.ul.ie: Beefed up the changelog] Signed-off-by: Andrea Arcangeli Signed-off-by: Mel Gorman Reported-by: Arthur Marsh Tested-by: Arthur Marsh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/compaction.h | 9 ++------- mm/compaction.c | 24 +++--------------------- mm/vmscan.c | 18 +----------------- 3 files changed, 6 insertions(+), 45 deletions(-) --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -11,9 +11,6 @@ /* The full zone was compacted */ #define COMPACT_COMPLETE 3 -#define COMPACT_MODE_DIRECT_RECLAIM 0 -#define COMPACT_MODE_KSWAPD 1 - #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, @@ -28,8 +25,7 @@ extern unsigned long try_to_compact_page bool sync); extern unsigned long compaction_suitable(struct zone *zone, int order); extern unsigned long compact_zone_order(struct zone *zone, int order, - gfp_t gfp_mask, bool sync, - int compact_mode); + gfp_t gfp_mask, bool sync); /* Do not skip compaction more than 64 times */ #define COMPACT_MAX_DEFER_SHIFT 6 @@ -74,8 +70,7 @@ static inline unsigned long compaction_s } static inline unsigned long compact_zone_order(struct zone *zone, int order, - gfp_t gfp_mask, bool sync, - int compact_mode) + gfp_t gfp_mask, bool sync) { return COMPACT_CONTINUE; } --- a/mm/compaction.c +++ b/mm/compaction.c @@ -42,8 +42,6 @@ struct compact_control { unsigned int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; - - int compact_mode; }; static unsigned long release_freepages(struct list_head *freelist) @@ -397,10 +395,7 @@ static int compact_finished(struct zone return COMPACT_COMPLETE; /* Compaction run is not finished if the watermark is not met */ - if (cc->compact_mode != COMPACT_MODE_KSWAPD) - watermark = low_wmark_pages(zone); - else - watermark = high_wmark_pages(zone); + watermark = low_wmark_pages(zone); watermark += (1 << cc->order); if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) @@ -413,15 +408,6 @@ static int compact_finished(struct zone if (cc->order == -1) return COMPACT_CONTINUE; - /* - * Generating only one page of the right order is not enough - * for kswapd, we must continue until we're above the high - * watermark as a pool for high order GFP_ATOMIC allocations - * too. - */ - if (cc->compact_mode == COMPACT_MODE_KSWAPD) - return COMPACT_CONTINUE; - /* Direct compactor: Is a suitable page free? */ for (order = cc->order; order < MAX_ORDER; order++) { /* Job done if page is free of the right migratetype */ @@ -543,8 +529,7 @@ static int compact_zone(struct zone *zon unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - bool sync, - int compact_mode) + bool sync) { struct compact_control cc = { .nr_freepages = 0, @@ -553,7 +538,6 @@ unsigned long compact_zone_order(struct .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, .sync = sync, - .compact_mode = compact_mode, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -599,8 +583,7 @@ unsigned long try_to_compact_pages(struc nodemask) { int status; - status = compact_zone_order(zone, order, gfp_mask, sync, - COMPACT_MODE_DIRECT_RECLAIM); + status = compact_zone_order(zone, order, gfp_mask, sync); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ @@ -631,7 +614,6 @@ static int compact_node(int nid) .nr_freepages = 0, .nr_migratepages = 0, .order = -1, - .compact_mode = COMPACT_MODE_DIRECT_RECLAIM, }; zone = &pgdat->node_zones[zoneid]; --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2397,7 +2397,6 @@ loop_again: * cause too much scanning of the lower zones. */ for (i = 0; i <= end_zone; i++) { - int compaction; struct zone *zone = pgdat->node_zones + i; int nr_slab; @@ -2428,24 +2427,9 @@ loop_again: sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; - compaction = 0; - if (order && - zone_watermark_ok(zone, 0, - high_wmark_pages(zone), - end_zone, 0) && - !zone_watermark_ok(zone, order, - high_wmark_pages(zone), - end_zone, 0)) { - compact_zone_order(zone, - order, - sc.gfp_mask, false, - COMPACT_MODE_KSWAPD); - compaction = 1; - } - if (zone->all_unreclaimable) continue; - if (!compaction && nr_slab == 0 && + if (nr_slab == 0 && !zone_reclaimable(zone)) zone->all_unreclaimable = 1; /* From 3a5dda7a17cf3706f79b86293f29db02d61e0d48 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 22 Mar 2011 16:30:09 -0700 Subject: oom: prevent unnecessary oom kills or kernel panics From: David Rientjes commit 3a5dda7a17cf3706f79b86293f29db02d61e0d48 upstream. This patch prevents unnecessary oom kills or kernel panics by reverting two commits: 495789a5 (oom: make oom_score to per-process value) cef1d352 (oom: multi threaded process coredump don't make deadlock) First, 495789a5 (oom: make oom_score to per-process value) ignores the fact that all threads in a thread group do not necessarily exit at the same time. It is imperative that select_bad_process() detect threads that are in the exit path, specifically those with PF_EXITING set, to prevent needlessly killing additional tasks. If a process is oom killed and the thread group leader exits, select_bad_process() cannot detect the other threads that are PF_EXITING by iterating over only processes. Thus, it currently chooses another task unnecessarily for oom kill or panics the machine when nothing else is eligible. By iterating over threads instead, it is possible to detect threads that are exiting and nominate them for oom kill so they get access to memory reserves. Second, cef1d352 (oom: multi threaded process coredump don't make deadlock) erroneously avoids making the oom killer a no-op when an eligible thread other than current isfound to be exiting. We want to detect this situation so that we may allow that exiting thread time to exit and free its memory; if it is able to exit on its own, that should free memory so current is no loner oom. If it is not able to exit on its own, the oom killer will nominate it for oom kill which, in this case, only means it will get access to memory reserves. Without this change, it is easy for the oom killer to unnecessarily target tasks when all threads of a victim don't exit before the thread group leader or, in the worst case, panic the machine. Signed-off-by: David Rientjes Cc: KOSAKI Motohiro Cc: KAMEZAWA Hiroyuki Cc: Oleg Nesterov Cc: Hugh Dickins Cc: Andrey Vagin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/oom_kill.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -292,11 +292,11 @@ static struct task_struct *select_bad_pr unsigned long totalpages, struct mem_cgroup *mem, const nodemask_t *nodemask) { - struct task_struct *p; + struct task_struct *g, *p; struct task_struct *chosen = NULL; *ppoints = 0; - for_each_process(p) { + do_each_thread(g, p) { unsigned int points; if (oom_unkillable_task(p, mem, nodemask)) @@ -324,7 +324,7 @@ static struct task_struct *select_bad_pr * the process of exiting and releasing its resources. * Otherwise we could get an easy OOM deadlock. */ - if (thread_group_empty(p) && (p->flags & PF_EXITING) && p->mm) { + if ((p->flags & PF_EXITING) && p->mm) { if (p != current) return ERR_PTR(-1UL); @@ -337,7 +337,7 @@ static struct task_struct *select_bad_pr chosen = p; *ppoints = points; } - } + } while_each_thread(g, p); return chosen; } From 30e2b41f20b6238f51e7cffb879c7a0f0073f5fe Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 22 Mar 2011 16:30:11 -0700 Subject: oom: skip zombies when iterating tasklist From: Andrey Vagin commit 30e2b41f20b6238f51e7cffb879c7a0f0073f5fe upstream. We shouldn't defer oom killing if a thread has already detached its ->mm and still has TIF_MEMDIE set. Memory needs to be freed, so find kill other threads that pin the same ->mm or find another task to kill. Signed-off-by: Andrey Vagin Signed-off-by: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/oom_kill.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -299,6 +299,8 @@ static struct task_struct *select_bad_pr do_each_thread(g, p) { unsigned int points; + if (!p->mm) + continue; if (oom_unkillable_task(p, mem, nodemask)) continue; @@ -324,7 +326,7 @@ static struct task_struct *select_bad_pr * the process of exiting and releasing its resources. * Otherwise we could get an easy OOM deadlock. */ - if ((p->flags & PF_EXITING) && p->mm) { + if (p->flags & PF_EXITING) { if (p != current) return ERR_PTR(-1UL); From edd45544c6f09550df0a5491aa8a07af24767e73 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 22 Mar 2011 16:30:12 -0700 Subject: oom: avoid deferring oom killer if exiting task is being traced From: David Rientjes commit edd45544c6f09550df0a5491aa8a07af24767e73 upstream. The oom killer naturally defers killing anything if it finds an eligible task that is already exiting and has yet to detach its ->mm. This avoids unnecessarily killing tasks when one is already in the exit path and may free enough memory that the oom killer is no longer needed. This is detected by PF_EXITING since threads that have already detached its ->mm are no longer considered at all. The problem with always deferring when a thread is PF_EXITING, however, is that it may never actually exit when being traced, specifically if another task is tracing it with PTRACE_O_TRACEEXIT. The oom killer does not want to defer in this case since there is no guarantee that thread will ever exit without intervention. This patch will now only defer the oom killer when a thread is PF_EXITING and no ptracer has stopped its progress in the exit path. It also ensures that a child is sacrificed for the chosen parent only if it has a different ->mm as the comment implies: this ensures that the thread group leader is always targeted appropriately. Signed-off-by: David Rientjes Reported-by: Oleg Nesterov Cc: KOSAKI Motohiro Cc: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Andrey Vagin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/oom_kill.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -31,6 +31,7 @@ #include #include #include +#include int sysctl_panic_on_oom; int sysctl_oom_kill_allocating_task; @@ -316,22 +317,29 @@ static struct task_struct *select_bad_pr if (test_tsk_thread_flag(p, TIF_MEMDIE)) return ERR_PTR(-1UL); - /* - * This is in the process of releasing memory so wait for it - * to finish before killing some other task by mistake. - * - * However, if p is the current task, we allow the 'kill' to - * go ahead if it is exiting: this will simply set TIF_MEMDIE, - * which will allow it to gain access to memory reserves in - * the process of exiting and releasing its resources. - * Otherwise we could get an easy OOM deadlock. - */ if (p->flags & PF_EXITING) { - if (p != current) - return ERR_PTR(-1UL); - - chosen = p; - *ppoints = 1000; + /* + * If p is the current task and is in the process of + * releasing memory, we allow the "kill" to set + * TIF_MEMDIE, which will allow it to gain access to + * memory reserves. Otherwise, it may stall forever. + * + * The loop isn't broken here, however, in case other + * threads are found to have already been oom killed. + */ + if (p == current) { + chosen = p; + *ppoints = 1000; + } else { + /* + * If this task is not being ptraced on exit, + * then wait for it to finish before killing + * some other task unnecessarily. + */ + if (!(task_ptrace(p->group_leader) & + PT_TRACE_EXIT)) + return ERR_PTR(-1UL); + } } points = oom_badness(p, mem, nodemask, totalpages); @@ -493,6 +501,8 @@ static int oom_kill_process(struct task_ list_for_each_entry(child, &t->children, sibling) { unsigned int child_points; + if (child->mm == p->mm) + continue; /* * oom_badness() returns 0 if the thread is unkillable */ From 47e9037ac16637cd7f12b8790ea7ce6680e42168 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Mon, 28 Feb 2011 16:20:11 +0000 Subject: PCI hotplug: acpiphp: set current_state to D0 in register_slot From: Stefano Stabellini commit 47e9037ac16637cd7f12b8790ea7ce6680e42168 upstream. If a device doesn't support power management (pm_cap == 0) but it is acpi_pci_power_manageable() because there is a _PS0 method declared for it and _EJ0 is also declared for the slot then nobody is going to set current_state = PCI_D0 for this device. This is what I think it is happening: pci_enable_device | __pci_enable_device_flags /* here we do not set current_state because !pm_cap */ | do_pci_enable_device | pci_set_power_state | __pci_start_power_transition | pci_platform_power_transition /* platform_pci_power_manageable() calls acpi_pci_power_manageable that * returns true */ | platform_pci_set_power_state /* acpi_pci_set_power_state gets called and does nothing because the * acpi device has _EJ0, see the comment "If the ACPI device has _EJ0, * ignore the device" */ at this point if we refer to the commit message that introduced the comment above (10b3dcae0f275e2546e55303d64ddbb58cec7599), it is up to the hotplug driver to set the state to D0. However AFAICT the pci hotplug driver never does, in fact drivers/pci/hotplug/acpiphp_glue.c:register_slot sets the slot flags to (SLOT_ENABLED | SLOT_POWEREDON) but it does not set the pci device current state to PCI_D0. So my proposed fix is also to set current_state = PCI_D0 in register_slot. Comments are very welcome. Signed-off-by: Stefano Stabellini Signed-off-by: Jesse Barnes Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/acpiphp_glue.c | 1 + 1 file changed, 1 insertion(+) --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -212,6 +212,7 @@ register_slot(acpi_handle handle, u32 lv pdev = pci_get_slot(pbus, PCI_DEVFN(device, function)); if (pdev) { + pdev->current_state = PCI_D0; slot->flags |= (SLOT_ENABLED | SLOT_POWEREDON); pci_dev_put(pdev); } From 8c3c283e6bf463ab498d6e7823aff6c4762314b6 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Wed, 16 Mar 2011 22:11:46 -0700 Subject: Input: xen-kbdfront - advertise either absolute or relative coordinates From: Olaf Hering commit 8c3c283e6bf463ab498d6e7823aff6c4762314b6 upstream. A virtualized display device is usually viewed with the vncviewer application, either by 'xm vnc domU' or with vncviewer localhost:port. vncviewer and the RFB protocol provides absolute coordinates to the virtual display. These coordinates are either passed through to a PV guest or converted to relative coordinates for a HVM guest. A PV guest receives these coordinates and passes them to the kernels evdev driver. There it can be picked up by applications such as the xorg-input drivers. Using absolute coordinates avoids issues such as guest mouse pointer not tracking host mouse pointer due to wrong mouse acceleration settings in the guests X display. Advertise either absolute or relative coordinates to the input system and the evdev driver, depending on what dom0 provides. The xorg-input driver prefers relative coordinates even if a devices provides both. Signed-off-by: Olaf Hering Signed-off-by: Stefano Stabellini Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/xen-kbdfront.c | 44 +++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -110,7 +110,7 @@ static irqreturn_t input_handler(int rq, static int __devinit xenkbd_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { - int ret, i; + int ret, i, abs; struct xenkbd_info *info; struct input_dev *kbd, *ptr; @@ -128,6 +128,11 @@ static int __devinit xenkbd_probe(struct if (!info->page) goto error_nomem; + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-abs-pointer", "%d", &abs) < 0) + abs = 0; + if (abs) + xenbus_printf(XBT_NIL, dev->nodename, "request-abs-pointer", "1"); + /* keyboard */ kbd = input_allocate_device(); if (!kbd) @@ -137,11 +142,12 @@ static int __devinit xenkbd_probe(struct kbd->id.bustype = BUS_PCI; kbd->id.vendor = 0x5853; kbd->id.product = 0xffff; - kbd->evbit[0] = BIT(EV_KEY); + + __set_bit(EV_KEY, kbd->evbit); for (i = KEY_ESC; i < KEY_UNKNOWN; i++) - set_bit(i, kbd->keybit); + __set_bit(i, kbd->keybit); for (i = KEY_OK; i < KEY_MAX; i++) - set_bit(i, kbd->keybit); + __set_bit(i, kbd->keybit); ret = input_register_device(kbd); if (ret) { @@ -160,12 +166,20 @@ static int __devinit xenkbd_probe(struct ptr->id.bustype = BUS_PCI; ptr->id.vendor = 0x5853; ptr->id.product = 0xfffe; - ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS); + + if (abs) { + __set_bit(EV_ABS, ptr->evbit); + input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0); + input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0); + } else { + input_set_capability(ptr, EV_REL, REL_X); + input_set_capability(ptr, EV_REL, REL_Y); + } + input_set_capability(ptr, EV_REL, REL_WHEEL); + + __set_bit(EV_KEY, ptr->evbit); for (i = BTN_LEFT; i <= BTN_TASK; i++) - set_bit(i, ptr->keybit); - ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL); - input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0); - input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0); + __set_bit(i, ptr->keybit); ret = input_register_device(ptr); if (ret) { @@ -272,7 +286,7 @@ static void xenkbd_backend_changed(struc enum xenbus_state backend_state) { struct xenkbd_info *info = dev_get_drvdata(&dev->dev); - int ret, val; + int val; switch (backend_state) { case XenbusStateInitialising: @@ -285,16 +299,6 @@ static void xenkbd_backend_changed(struc case XenbusStateInitWait: InitWait: - ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend, - "feature-abs-pointer", "%d", &val); - if (ret < 0) - val = 0; - if (val) { - ret = xenbus_printf(XBT_NIL, info->xbdev->nodename, - "request-abs-pointer", "1"); - if (ret) - pr_warning("can't request abs-pointer\n"); - } xenbus_switch_state(dev, XenbusStateConnected); break; From 14988a4d350ce3b41ecad4f63c4f44c56f5ae34d Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 18 Feb 2011 11:32:40 +0000 Subject: xen: set max_pfn_mapped to the last pfn mapped From: Stefano Stabellini commit 14988a4d350ce3b41ecad4f63c4f44c56f5ae34d upstream. Do not set max_pfn_mapped to the end of the initial memory mappings, that also contain pages that don't belong in pfn space (like the mfn list). Set max_pfn_mapped to the last real pfn mapped in the initial memory mappings that is the pfn backing _end. Signed-off-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk LKML-Reference: Signed-off-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/mmu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1651,9 +1651,6 @@ static __init void xen_map_identity_earl for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { pte_t pte; - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - if (!pte_none(pte_page[pteidx])) continue; @@ -1711,6 +1708,12 @@ __init pgd_t *xen_setup_kernel_pagetable pud_t *l3; pmd_t *l2; + /* max_pfn_mapped is the last pfn mapped in the initial memory + * mappings. Considering that on Xen after the kernel mappings we + * have the mappings of some pages that don't exist in pfn space, we + * set max_pfn_mapped to the last real pfn mapped. */ + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); @@ -1815,9 +1818,7 @@ __init pgd_t *xen_setup_kernel_pagetable initial_kernel_pmd = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + - xen_start_info->nr_pt_frames * PAGE_SIZE + - 512*1024); + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); From da48524eb20662618854bb3df2db01fc65f3070c Mon Sep 17 00:00:00 2001 From: Julien Tinnes Date: Fri, 18 Mar 2011 15:05:21 -0700 Subject: Prevent rt_sigqueueinfo and rt_tgsigqueueinfo from spoofing the signal code From: Julien Tinnes commit da48524eb20662618854bb3df2db01fc65f3070c upstream. Userland should be able to trust the pid and uid of the sender of a signal if the si_code is SI_TKILL. Unfortunately, the kernel has historically allowed sigqueueinfo() to send any si_code at all (as long as it was negative - to distinguish it from kernel-generated signals like SIGILL etc), so it could spoof a SI_TKILL with incorrect siginfo values. Happily, it looks like glibc has always set si_code to the appropriate SI_QUEUE, so there are probably no actual user code that ever uses anything but the appropriate SI_QUEUE flag. So just tighten the check for si_code (we used to allow any negative value), and add a (one-time) warning in case there are binaries out there that might depend on using other si_code values. Signed-off-by: Julien Tinnes Acked-by: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2421,9 +2421,13 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, return -EFAULT; /* Not even root can pretend to send signals from the kernel. - Nor can they impersonate a kill(), which adds source info. */ - if (info.si_code >= 0) + * Nor can they impersonate a kill()/tgkill(), which adds source info. + */ + if (info.si_code != SI_QUEUE) { + /* We used to allow any < 0 si_code */ + WARN_ON_ONCE(info.si_code < 0); return -EPERM; + } info.si_signo = sig; /* POSIX.1b doesn't mention process groups. */ @@ -2437,9 +2441,13 @@ long do_rt_tgsigqueueinfo(pid_t tgid, pi return -EINVAL; /* Not even root can pretend to send signals from the kernel. - Nor can they impersonate a kill(), which adds source info. */ - if (info->si_code >= 0) + * Nor can they impersonate a kill()/tgkill(), which adds source info. + */ + if (info->si_code != SI_QUEUE) { + /* We used to allow any < 0 si_code */ + WARN_ON_ONCE(info->si_code < 0); return -EPERM; + } info->si_signo = sig; return do_send_specific(tgid, pid, sig, info); From c6066ab342e8cb3e4e3ec74f7577f099d79465b3 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Tue, 22 Mar 2011 10:23:28 +0100 Subject: ALSA: HDA: Fix internal mic on Dell E5420/E5520 From: David Henningsson This is a fixup for the 2.6.38 kernel, as the issue is being resolved by upstream commits 699d899560cd7e72da39231e584412e7ac8114a4 and 094a42452abd5564429045e210281c6d22e67fca - which are too invasive to reach 2.6.38. Instead make pin fixes as a workaround. BugLink: http://bugs.launchpad.net/bugs/740055 Tested-by: Kent Baxley Signed-off-by: David Henningsson Acked-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -94,6 +94,7 @@ enum { STAC_92HD83XXX_REF, STAC_92HD83XXX_PWR_REF, STAC_DELL_S14, + STAC_DELL_E5520M, STAC_92HD83XXX_HP, STAC_HP_DV7_4000, STAC_92HD83XXX_MODELS @@ -1657,6 +1658,13 @@ static unsigned int dell_s14_pin_configs 0x40f000f0, 0x40f000f0, }; +/* Switch int mic from 0x20 to 0x11 */ +static unsigned int dell_e5520m_pin_configs[10] = { + 0x04a11020, 0x0421101f, 0x400000f0, 0x90170110, + 0x23011050, 0x23a1102e, 0x400000f3, 0xd5a30130, + 0x400000f0, 0x40f000f0, +}; + static unsigned int hp_dv7_4000_pin_configs[10] = { 0x03a12050, 0x0321201f, 0x40f000f0, 0x90170110, 0x40f000f0, 0x40f000f0, 0x90170110, 0xd5a30140, @@ -1667,6 +1675,7 @@ static unsigned int *stac92hd83xxx_brd_t [STAC_92HD83XXX_REF] = ref92hd83xxx_pin_configs, [STAC_92HD83XXX_PWR_REF] = ref92hd83xxx_pin_configs, [STAC_DELL_S14] = dell_s14_pin_configs, + [STAC_DELL_E5520M] = dell_e5520m_pin_configs, [STAC_HP_DV7_4000] = hp_dv7_4000_pin_configs, }; @@ -1675,6 +1684,7 @@ static const char * const stac92hd83xxx_ [STAC_92HD83XXX_REF] = "ref", [STAC_92HD83XXX_PWR_REF] = "mic-ref", [STAC_DELL_S14] = "dell-s14", + [STAC_DELL_E5520M] = "dell-e5520m", [STAC_92HD83XXX_HP] = "hp", [STAC_HP_DV7_4000] = "hp-dv7-4000", }; @@ -1687,6 +1697,14 @@ static struct snd_pci_quirk stac92hd83xx "DFI LanParty", STAC_92HD83XXX_REF), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x02ba, "unknown Dell", STAC_DELL_S14), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x049a, + "Dell E5520", STAC_DELL_E5520M), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x049b, + "Dell E5420", STAC_DELL_E5520M), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x04eb, + "Dell E5420m", STAC_DELL_E5520M), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x04ec, + "Dell E5520m", STAC_DELL_E5520M), SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xff00, 0x3600, "HP", STAC_92HD83XXX_HP), {} /* terminator */ From ef2b4b95a63a1d23958dcb99eb2c6898eddc87d0 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 18 Mar 2011 00:16:35 +0100 Subject: mm: PageBuddy and mapcount robustness From: Andrea Arcangeli commit ef2b4b95a63a1d23958dcb99eb2c6898eddc87d0 upstream. Change the _mapcount value indicating PageBuddy from -2 to -128 for more robusteness against page_mapcount() undeflows. Use reset_page_mapcount instead of __ClearPageBuddy in bad_page to ignore the previous retval of PageBuddy(). Signed-off-by: Andrea Arcangeli Reported-by: Hugh Dickins Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 11 +++++++++-- mm/page_alloc.c | 4 ++-- 2 files changed, 11 insertions(+), 4 deletions(-) --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -402,16 +402,23 @@ static inline void init_page_count(struc /* * PageBuddy() indicate that the page is free and in the buddy system * (see mm/page_alloc.c). + * + * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to + * -2 so that an underflow of the page_mapcount() won't be mistaken + * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very + * efficiently by most CPU architectures. */ +#define PAGE_BUDDY_MAPCOUNT_VALUE (-128) + static inline int PageBuddy(struct page *page) { - return atomic_read(&page->_mapcount) == -2; + return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE; } static inline void __SetPageBuddy(struct page *page) { VM_BUG_ON(atomic_read(&page->_mapcount) != -1); - atomic_set(&page->_mapcount, -2); + atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE); } static inline void __ClearPageBuddy(struct page *page) --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -286,7 +286,7 @@ static void bad_page(struct page *page) /* Don't complain about poisoned pages */ if (PageHWPoison(page)) { - __ClearPageBuddy(page); + reset_page_mapcount(page); /* remove PageBuddy */ return; } @@ -317,7 +317,7 @@ static void bad_page(struct page *page) dump_stack(); out: /* Leave bad fields for debug, except PageBuddy could make trouble */ - __ClearPageBuddy(page); + reset_page_mapcount(page); /* remove PageBuddy */ add_taint(TAINT_BAD_PAGE); }