From 8d91c1e411f55d7ea91b1183a2e9f8088fb4d5be Mon Sep 17 00:00:00 2001 From: André Fabian Silva Delgado Date: Tue, 15 Dec 2015 14:52:16 -0300 Subject: Linux-libre 4.3.2-gnu --- arch/powerpc/platforms/powernv/eeh-powernv.c | 12 +- arch/powerpc/platforms/powernv/opal-hmi.c | 177 ++++++++++++++++++++++++- arch/powerpc/platforms/powernv/opal-irqchip.c | 3 +- arch/powerpc/platforms/powernv/opal-power.c | 147 +++++++++++++++++--- arch/powerpc/platforms/powernv/opal-wrappers.S | 4 + arch/powerpc/platforms/powernv/opal.c | 50 ++++++- arch/powerpc/platforms/powernv/pci-ioda.c | 147 +++++++++----------- arch/powerpc/platforms/powernv/pci.c | 15 +-- arch/powerpc/platforms/powernv/pci.h | 7 +- arch/powerpc/platforms/powernv/powernv.h | 6 - arch/powerpc/platforms/powernv/rng.c | 2 +- arch/powerpc/platforms/powernv/setup.c | 16 +-- arch/powerpc/platforms/powernv/smp.c | 29 +++- arch/powerpc/platforms/powernv/subcore.c | 4 +- 14 files changed, 468 insertions(+), 151 deletions(-) (limited to 'arch/powerpc/platforms/powernv') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 7cf0df859..3bb6acb76 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1394,11 +1394,19 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) */ if (pnv_eeh_get_pe(hose, be64_to_cpu(frozen_pe_no), pe)) { - /* Try best to clear it */ pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", - hose->global_number, frozen_pe_no); + hose->global_number, be64_to_cpu(frozen_pe_no)); pr_info("EEH: PHB location: %s\n", eeh_pe_loc_get(phb_pe)); + + /* Dump PHB diag-data */ + rc = opal_pci_get_phb_diag_data2(phb->opal_id, + phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); + if (rc == OPAL_SUCCESS) + pnv_pci_dump_phb_diag_data(hose, + phb->diag.blob); + + /* Try best to clear it */ opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index a8f49d380..d000f4e21 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -35,9 +35,134 @@ struct OpalHmiEvtNode { struct list_head list; struct OpalHMIEvent hmi_evt; }; + +struct xstop_reason { + uint32_t xstop_reason; + const char *unit_failed; + const char *description; +}; + static LIST_HEAD(opal_hmi_evt_list); static DEFINE_SPINLOCK(opal_hmi_evt_lock); +static void print_core_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + int i; + static const struct xstop_reason xstop_reason[] = { + { CORE_CHECKSTOP_IFU_REGFILE, "IFU", + "RegFile core check stop" }, + { CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_DURING_RECOV, "PC", + "Core checkstop during recovery" }, + { CORE_CHECKSTOP_ISU_REGFILE, "ISU", + "RegFile core check stop (mapper error)" }, + { CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" }, + { CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" }, + { CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC", + "Recovery in maintenance mode" }, + { CORE_CHECKSTOP_LSU_REGFILE, "LSU", + "RegFile core check stop" }, + { CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC", + "Forward Progress Error" }, + { CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC", + "Hypervisor Resource error - core check stop" }, + { CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC", + "Hang Recovery Failed (core check stop)" }, + { CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC", + "Ambiguous Hang Detected (unknown source)" }, + { CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC", + "Debug Trigger Error inject" }, + { CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC", + "Hypervisor check stop via SPRC/SPRD" }, + }; + + /* Validity check */ + if (!hmi_evt->u.xstop_error.xstop_reason) { + printk("%s Unknown Core check stop.\n", level); + return; + } + + printk("%s CPU PIR: %08x\n", level, + be32_to_cpu(hmi_evt->u.xstop_error.u.pir)); + for (i = 0; i < ARRAY_SIZE(xstop_reason); i++) + if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) & + xstop_reason[i].xstop_reason) + printk("%s [Unit: %-3s] %s\n", level, + xstop_reason[i].unit_failed, + xstop_reason[i].description); +} + +static void print_nx_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + int i; + static const struct xstop_reason xstop_reason[] = { + { NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine", + "SHM invalid state error" }, + { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine", + "DMA invalid state error bit 15" }, + { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine", + "DMA invalid state error bit 16" }, + { NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine", + "Channel 0 invalid state error" }, + { NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine", + "Channel 1 invalid state error" }, + { NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine", + "Channel 2 invalid state error" }, + { NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine", + "Channel 3 invalid state error" }, + { NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine", + "Channel 4 invalid state error" }, + { NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine", + "Channel 5 invalid state error" }, + { NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine", + "Channel 6 invalid state error" }, + { NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine", + "Channel 7 invalid state error" }, + { NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine", + "UE error on CRB(CSB address, CCB)" }, + { NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine", + "SUE error on CRB(CSB address, CCB)" }, + { NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface", + "CRB Kill ISN received while holding ISN with UE error" }, + }; + + /* Validity check */ + if (!hmi_evt->u.xstop_error.xstop_reason) { + printk("%s Unknown NX check stop.\n", level); + return; + } + + printk("%s NX checkstop on CHIP ID: %x\n", level, + be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id)); + for (i = 0; i < ARRAY_SIZE(xstop_reason); i++) + if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) & + xstop_reason[i].xstop_reason) + printk("%s [Unit: %-3s] %s\n", level, + xstop_reason[i].unit_failed, + xstop_reason[i].description); +} + +static void print_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + switch (hmi_evt->u.xstop_error.xstop_type) { + case CHECKSTOP_TYPE_CORE: + print_core_checkstop_reason(level, hmi_evt); + break; + case CHECKSTOP_TYPE_NX: + print_nx_checkstop_reason(level, hmi_evt); + break; + case CHECKSTOP_TYPE_UNKNOWN: + printk("%s Unknown Malfunction Alert.\n", level); + break; + } +} + static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) { const char *level, *sevstr, *error_info; @@ -95,6 +220,13 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) printk("%s TFMR: %016llx\n", level, be64_to_cpu(hmi_evt->tfmr)); + + if (hmi_evt->version < OpalHMIEvt_V2) + return; + + /* OpalHMIEvt_V2 and above provides reason for malfunction alert. */ + if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT) + print_checkstop_reason(level, hmi_evt); } static void hmi_event_handler(struct work_struct *work) @@ -103,6 +235,8 @@ static void hmi_event_handler(struct work_struct *work) struct OpalHMIEvent *hmi_evt; struct OpalHmiEvtNode *msg_node; uint8_t disposition; + struct opal_msg msg; + int unrecoverable = 0; spin_lock_irqsave(&opal_hmi_evt_lock, flags); while (!list_empty(&opal_hmi_evt_list)) { @@ -118,14 +252,53 @@ static void hmi_event_handler(struct work_struct *work) /* * Check if HMI event has been recovered or not. If not - * then we can't continue, invoke panic. + * then kernel can't continue, we need to panic. + * But before we do that, display all the HMI event + * available on the list and set unrecoverable flag to 1. */ if (disposition != OpalHMI_DISPOSITION_RECOVERED) - panic("Unrecoverable HMI exception"); + unrecoverable = 1; spin_lock_irqsave(&opal_hmi_evt_lock, flags); } spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + if (unrecoverable) { + int ret; + + /* Pull all HMI events from OPAL before we panic. */ + while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) { + u32 type; + + type = be32_to_cpu(msg.msg_type); + + /* skip if not HMI event */ + if (type != OPAL_MSG_HMI_EVT) + continue; + + /* HMI event info starts from param[0] */ + hmi_evt = (struct OpalHMIEvent *)&msg.params[0]; + print_hmi_event_info(hmi_evt); + } + + /* + * Unrecoverable HMI exception. We need to inform BMC/OCC + * about this error so that it can collect relevant data + * for error analysis before rebooting. + */ + ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, + "Unrecoverable HMI exception"); + if (ret == OPAL_UNSUPPORTED) { + pr_emerg("Reboot type %d not supported\n", + OPAL_REBOOT_PLATFORM_ERROR); + } + + /* + * Fall through and panic if opal_cec_reboot2() returns + * OPAL_UNSUPPORTED. + */ + panic("Unrecoverable HMI exception"); + } } static DECLARE_WORK(hmi_event_work, hmi_event_handler); diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index e2e7d75f5..2c91ee780 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -134,7 +134,8 @@ static void opal_handle_irq_work(struct irq_work *work) opal_handle_events(be64_to_cpu(last_outstanding_events)); } -static int opal_event_match(struct irq_domain *h, struct device_node *node) +static int opal_event_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { return h->of_node == node; } diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c index ac46c2c24..58dc33082 100644 --- a/arch/powerpc/platforms/powernv/opal-power.c +++ b/arch/powerpc/platforms/powernv/opal-power.c @@ -9,9 +9,12 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "opal-power: " fmt + #include #include #include +#include #include #include @@ -19,30 +22,116 @@ #define SOFT_OFF 0x00 #define SOFT_REBOOT 0x01 +/* Detect EPOW event */ +static bool detect_epow(void) +{ + u16 epow; + int i, rc; + __be16 epow_classes; + __be16 opal_epow_status[OPAL_SYSEPOW_MAX] = {0}; + + /* + * Check for EPOW event. Kernel sends supported EPOW classes info + * to OPAL. OPAL returns EPOW info along with classes present. + */ + epow_classes = cpu_to_be16(OPAL_SYSEPOW_MAX); + rc = opal_get_epow_status(opal_epow_status, &epow_classes); + if (rc != OPAL_SUCCESS) { + pr_err("Failed to get EPOW event information\n"); + return false; + } + + /* Look for EPOW events present */ + for (i = 0; i < be16_to_cpu(epow_classes); i++) { + epow = be16_to_cpu(opal_epow_status[i]); + + /* Filter events which do not need shutdown. */ + if (i == OPAL_SYSEPOW_POWER) + epow &= ~(OPAL_SYSPOWER_CHNG | OPAL_SYSPOWER_FAIL | + OPAL_SYSPOWER_INCL); + if (epow) + return true; + } + + return false; +} + +/* Check for existing EPOW, DPO events */ +static bool poweroff_pending(void) +{ + int rc; + __be64 opal_dpo_timeout; + + /* Check for DPO event */ + rc = opal_get_dpo_status(&opal_dpo_timeout); + if (rc == OPAL_SUCCESS) { + pr_info("Existing DPO event detected.\n"); + return true; + } + + /* Check for EPOW event */ + if (detect_epow()) { + pr_info("Existing EPOW event detected.\n"); + return true; + } + + return false; +} + +/* OPAL power-control events notifier */ static int opal_power_control_event(struct notifier_block *nb, - unsigned long msg_type, void *msg) + unsigned long msg_type, void *msg) { - struct opal_msg *power_msg = msg; uint64_t type; - type = be64_to_cpu(power_msg->params[0]); - - switch (type) { - case SOFT_REBOOT: - pr_info("OPAL: reboot requested\n"); - orderly_reboot(); + switch (msg_type) { + case OPAL_MSG_EPOW: + if (detect_epow()) { + pr_info("EPOW msg received. Powering off system\n"); + orderly_poweroff(true); + } break; - case SOFT_OFF: - pr_info("OPAL: poweroff requested\n"); + case OPAL_MSG_DPO: + pr_info("DPO msg received. Powering off system\n"); orderly_poweroff(true); break; + case OPAL_MSG_SHUTDOWN: + type = be64_to_cpu(((struct opal_msg *)msg)->params[0]); + switch (type) { + case SOFT_REBOOT: + pr_info("Reboot requested\n"); + orderly_reboot(); + break; + case SOFT_OFF: + pr_info("Poweroff requested\n"); + orderly_poweroff(true); + break; + default: + pr_err("Unknown power-control type %llu\n", type); + } + break; default: - pr_err("OPAL: power control type unexpected %016llx\n", type); + pr_err("Unknown OPAL message type %lu\n", msg_type); } return 0; } +/* OPAL EPOW event notifier block */ +static struct notifier_block opal_epow_nb = { + .notifier_call = opal_power_control_event, + .next = NULL, + .priority = 0, +}; + +/* OPAL DPO event notifier block */ +static struct notifier_block opal_dpo_nb = { + .notifier_call = opal_power_control_event, + .next = NULL, + .priority = 0, +}; + +/* OPAL power-control event notifier block */ static struct notifier_block opal_power_control_nb = { .notifier_call = opal_power_control_event, .next = NULL, @@ -51,16 +140,40 @@ static struct notifier_block opal_power_control_nb = { static int __init opal_power_control_init(void) { - int ret; + int ret, supported = 0; + struct device_node *np; + /* Register OPAL power-control events notifier */ ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN, - &opal_power_control_nb); - if (ret) { - pr_err("%s: Can't register OPAL event notifier (%d)\n", - __func__, ret); - return ret; + &opal_power_control_nb); + if (ret) + pr_err("Failed to register SHUTDOWN notifier, ret = %d\n", ret); + + /* Determine OPAL EPOW, DPO support */ + np = of_find_node_by_path("/ibm,opal/epow"); + if (np) { + supported = of_device_is_compatible(np, "ibm,opal-v3-epow"); + of_node_put(np); } + if (!supported) + return 0; + pr_info("OPAL EPOW, DPO support detected.\n"); + + /* Register EPOW event notifier */ + ret = opal_message_notifier_register(OPAL_MSG_EPOW, &opal_epow_nb); + if (ret) + pr_err("Failed to register EPOW notifier, ret = %d\n", ret); + + /* Register DPO event notifier */ + ret = opal_message_notifier_register(OPAL_MSG_DPO, &opal_dpo_nb); + if (ret) + pr_err("Failed to register DPO notifier, ret = %d\n", ret); + + /* Check for any pending EPOW or DPO events. */ + if (poweroff_pending()) + orderly_poweroff(true); + return 0; } machine_subsys_initcall(powernv, opal_power_control_init); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index d6a7b8252..b7a464fef 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -202,6 +202,7 @@ OPAL_CALL(opal_rtc_read, OPAL_RTC_READ); OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE); OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN); OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT); +OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2); OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM); OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM); OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT); @@ -249,6 +250,7 @@ OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT); OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR); OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); +OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS); OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR); OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL); @@ -297,3 +299,5 @@ OPAL_CALL(opal_flash_read, OPAL_FLASH_READ); OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE); OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); +OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR); +OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index f084afa0e..4296d55e8 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -441,6 +441,7 @@ static int opal_recover_mce(struct pt_regs *regs, int opal_machine_check(struct pt_regs *regs) { struct machine_check_event evt; + int ret; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return 0; @@ -455,6 +456,43 @@ int opal_machine_check(struct pt_regs *regs) if (opal_recover_mce(regs, &evt)) return 1; + + /* + * Unrecovered machine check, we are heading to panic path. + * + * We may have hit this MCE in very early stage of kernel + * initialization even before opal-prd has started running. If + * this is the case then this MCE error may go un-noticed or + * un-analyzed if we go down panic path. We need to inform + * BMC/OCC about this error so that they can collect relevant + * data for error analysis before rebooting. + * Use opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR) to do so. + * This function may not return on BMC based system. + */ + ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, + "Unrecoverable Machine Check exception"); + if (ret == OPAL_UNSUPPORTED) { + pr_emerg("Reboot type %d not supported\n", + OPAL_REBOOT_PLATFORM_ERROR); + } + + /* + * We reached here. There can be three possibilities: + * 1. We are running on a firmware level that do not support + * opal_cec_reboot2() + * 2. We are running on a firmware level that do not support + * OPAL_REBOOT_PLATFORM_ERROR reboot type. + * 3. We are running on FSP based system that does not need opal + * to trigger checkstop explicitly for error analysis. The FSP + * PRD component would have already got notified about this + * error through other channels. + * + * If hardware marked this as an unrecoverable MCE, we are + * going to panic anyway. Even if it didn't, it's not safe to + * continue at this point, so we should explicitly panic. + */ + + panic("PowerNV Unrecovered Machine Check"); return 0; } @@ -648,7 +686,7 @@ static void opal_init_heartbeat(void) static int __init opal_init(void) { - struct device_node *np, *consoles; + struct device_node *np, *consoles, *leds; int rc; opal_node = of_find_node_by_path("/ibm,opal"); @@ -689,6 +727,13 @@ static int __init opal_init(void) /* Setup a heatbeat thread if requested by OPAL */ opal_init_heartbeat(); + /* Create leds platform devices */ + leds = of_find_node_by_path("/ibm,opal/leds"); + if (leds) { + of_platform_device_create(leds, "opal_leds", NULL); + of_node_put(leds); + } + /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { @@ -841,3 +886,6 @@ EXPORT_SYMBOL_GPL(opal_rtc_write); EXPORT_SYMBOL_GPL(opal_tpo_read); EXPORT_SYMBOL_GPL(opal_tpo_write); EXPORT_SYMBOL_GPL(opal_i2c_request); +/* Export these symbols for PowerNV LED class driver */ +EXPORT_SYMBOL_GPL(opal_leds_get_ind); +EXPORT_SYMBOL_GPL(opal_leds_set_ind); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 8b64f89e6..414fd1a00 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -140,11 +140,9 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) return; } - if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) { - pr_warn("%s: PE %d was assigned on PHB#%x\n", - __func__, pe_no, phb->hose->global_number); - return; - } + if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) + pr_debug("%s: PE %d was reserved on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); phb->ioda.pe_array[pe_no].phb = phb; phb->ioda.pe_array[pe_no].pe_number = pe_no; @@ -231,61 +229,60 @@ fail: return -EIO; } -static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) +static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev, + unsigned long *pe_bitmap) { - resource_size_t sgsz = phb->ioda.m64_segsize; - struct pci_dev *pdev; + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct resource *r; - int base, step, i; - - /* - * Root bus always has full M64 range and root port has - * M64 range used in reality. So we're checking root port - * instead of root bus. - */ - list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { - for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { - r = &pdev->resource[PCI_BRIDGE_RESOURCES + i]; - if (!r->parent || - !pnv_pci_is_mem_pref_64(r->flags)) - continue; + resource_size_t base, sgsz, start, end; + int segno, i; + + base = phb->ioda.m64_base; + sgsz = phb->ioda.m64_segsize; + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + r = &pdev->resource[i]; + if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags)) + continue; - base = (r->start - phb->ioda.m64_base) / sgsz; - for (step = 0; step < resource_size(r) / sgsz; step++) - pnv_ioda_reserve_pe(phb, base + step); + start = _ALIGN_DOWN(r->start - base, sgsz); + end = _ALIGN_UP(r->end - base, sgsz); + for (segno = start / sgsz; segno < end / sgsz; segno++) { + if (pe_bitmap) + set_bit(segno, pe_bitmap); + else + pnv_ioda_reserve_pe(phb, segno); } } } -static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, - struct pci_bus *bus, int all) +static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus, + unsigned long *pe_bitmap, + bool all) { - resource_size_t segsz = phb->ioda.m64_segsize; struct pci_dev *pdev; - struct resource *r; + + list_for_each_entry(pdev, &bus->devices, bus_list) { + pnv_ioda2_reserve_dev_m64_pe(pdev, pe_bitmap); + + if (all && pdev->subordinate) + pnv_ioda2_reserve_m64_pe(pdev->subordinate, + pe_bitmap, all); + } +} + +static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct pnv_phb *phb = hose->private_data; struct pnv_ioda_pe *master_pe, *pe; unsigned long size, *pe_alloc; - bool found; - int start, i, j; + int i; /* Root bus shouldn't use M64 */ if (pci_is_root_bus(bus)) return IODA_INVALID_PE; - /* We support only one M64 window on each bus */ - found = false; - pci_bus_for_each_resource(bus, r, i) { - if (r && r->parent && - pnv_pci_is_mem_pref_64(r->flags)) { - found = true; - break; - } - } - - /* No M64 window found ? */ - if (!found) - return IODA_INVALID_PE; - /* Allocate bitmap */ size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); pe_alloc = kzalloc(size, GFP_KERNEL); @@ -295,35 +292,8 @@ static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, return IODA_INVALID_PE; } - /* - * Figure out reserved PE numbers by the PE - * the its child PEs. - */ - start = (r->start - phb->ioda.m64_base) / segsz; - for (i = 0; i < resource_size(r) / segsz; i++) - set_bit(start + i, pe_alloc); - - if (all) - goto done; - - /* - * If the PE doesn't cover all subordinate buses, - * we need subtract from reserved PEs for children. - */ - list_for_each_entry(pdev, &bus->devices, bus_list) { - if (!pdev->subordinate) - continue; - - pci_bus_for_each_resource(pdev->subordinate, r, i) { - if (!r || !r->parent || - !pnv_pci_is_mem_pref_64(r->flags)) - continue; - - start = (r->start - phb->ioda.m64_base) / segsz; - for (j = 0; j < resource_size(r) / segsz ; j++) - clear_bit(start + j, pe_alloc); - } - } + /* Figure out reserved PE numbers by the PE */ + pnv_ioda2_reserve_m64_pe(bus, pe_alloc, all); /* * the current bus might not own M64 window and that's all @@ -339,7 +309,6 @@ static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, * Figure out the master PE and put all slave PEs to master * PE's list to form compound PE. */ -done: master_pe = NULL; i = -1; while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < @@ -653,7 +622,7 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb, pdev = pe->pdev->bus->self; #ifdef CONFIG_PCI_IOV else if (pe->flags & PNV_IODA_PE_VF) - pdev = pe->parent_dev->bus->self; + pdev = pe->parent_dev; #endif /* CONFIG_PCI_IOV */ while (pdev) { struct pci_dn *pdn = pci_get_pdn(pdev); @@ -732,7 +701,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) parent = parent->bus->self; } - opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number, + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); /* Disassociate PE in PELT */ @@ -946,8 +915,9 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) res2 = *res; res->start += size * offset; - dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n", - i, &res2, res, num_vfs, offset); + dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", + i, &res2, res, (offset > 0) ? "En" : "Dis", + num_vfs, offset); pci_update_resource(dev, i + PCI_IOV_RESOURCES); } return 0; @@ -1050,7 +1020,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) * subordinate PCI devices and buses. The second type of PE is normally * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. */ -static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) +static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; @@ -1059,7 +1029,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) /* Check if PE is determined by M64 */ if (phb->pick_m64_pe) - pe_num = phb->pick_m64_pe(phb, bus, all); + pe_num = phb->pick_m64_pe(bus, all); /* The PE number isn't pinned by M64 */ if (pe_num == IODA_INVALID_PE) @@ -1117,12 +1087,12 @@ static void pnv_ioda_setup_PEs(struct pci_bus *bus) { struct pci_dev *dev; - pnv_ioda_setup_bus_PE(bus, 0); + pnv_ioda_setup_bus_PE(bus, false); list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->subordinate) { if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) - pnv_ioda_setup_bus_PE(dev->subordinate, 1); + pnv_ioda_setup_bus_PE(dev->subordinate, true); else pnv_ioda_setup_PEs(dev->subordinate); } @@ -1147,7 +1117,7 @@ static void pnv_pci_ioda_setup_PEs(void) /* M64 layout might affect PE allocation */ if (phb->reserve_m64_pe) - phb->reserve_m64_pe(phb); + phb->reserve_m64_pe(hose->bus, NULL, true); pnv_ioda_setup_PEs(hose->bus); } @@ -1590,6 +1560,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); + set_dma_offset(&pdev->dev, pe->tce_bypass_base); set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); /* * Note: iommu_add_device() will fail here as @@ -1620,19 +1591,18 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) if (bypass) { dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); set_dma_ops(&pdev->dev, &dma_direct_ops); - set_dma_offset(&pdev->dev, pe->tce_bypass_base); } else { dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); set_dma_ops(&pdev->dev, &dma_iommu_ops); - set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); } *pdev->dev.dma_mask = dma_mask; return 0; } -static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, - struct pci_dev *pdev) +static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev) { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; u64 end, mask; @@ -1659,6 +1629,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, list_for_each_entry(dev, &bus->devices, bus_list) { set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); + set_dma_offset(&dev->dev, pe->tce_bypass_base); iommu_add_device(&dev->dev); if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) @@ -3071,6 +3042,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .window_alignment = pnv_pci_window_alignment, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .dma_set_mask = pnv_pci_ioda_dma_set_mask, + .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, .shutdown = pnv_pci_ioda_shutdown, }; @@ -3217,7 +3189,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; - phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; /* Setup MSI support */ pnv_pci_init_ioda_msis(phb); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index fd16f86e5..f2dd77234 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -61,7 +61,7 @@ int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) if (pdev->no_64bit_msi && !phb->msi32_support) return -ENODEV; - list_for_each_entry(entry, &pdev->msi_list, list) { + for_each_pci_msi_entry(entry, pdev) { if (!entry->msi_attrib.is_64 && !phb->msi32_support) { pr_warn("%s: Supports only 64-bit MSIs\n", pci_name(pdev)); @@ -104,7 +104,7 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev) if (WARN_ON(!phb)) return; - list_for_each_entry(entry, &pdev->msi_list, list) { + for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; hwirq = virq_to_hw(entry->irq); @@ -762,17 +762,6 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) phb->dma_dev_setup(phb, pdev); } -u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - - if (phb && phb->dma_get_required_mask) - return phb->dma_get_required_mask(phb, pdev); - - return __dma_get_required_mask(&pdev->dev); -} - void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 8ef2d28ad..c8ff50e90 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -105,13 +105,12 @@ struct pnv_phb { unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg); void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); - u64 (*dma_get_required_mask)(struct pnv_phb *phb, - struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); int (*init_m64)(struct pnv_phb *phb); - void (*reserve_m64_pe)(struct pnv_phb *phb); - int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all); + void (*reserve_m64_pe)(struct pci_bus *bus, + unsigned long *pe_bitmap, bool all); + int (*pick_m64_pe)(struct pci_bus *bus, bool all); int (*get_pe_state)(struct pnv_phb *phb, int pe_no); void (*freeze_pe)(struct pnv_phb *phb, int pe_no); int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 9269e30e4..6dbc0a1da 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -12,15 +12,9 @@ struct pci_dev; #ifdef CONFIG_PCI extern void pnv_pci_init(void); extern void pnv_pci_shutdown(void); -extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev); #else static inline void pnv_pci_init(void) { } static inline void pnv_pci_shutdown(void) { } - -static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) -{ - return 0; -} #endif extern u32 pnv_get_supported_cpuidle_states(void); diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 6eb808ff6..5dcbdea1a 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -128,7 +128,7 @@ static __init int rng_create(struct device_node *dn) pr_info_once("Registering arch random hook.\n"); - ppc_md.get_random_long = powernv_get_random_long; + ppc_md.get_random_seed = powernv_get_random_long; return 0; } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 53737e019..685b3cbe1 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -165,14 +165,6 @@ static void pnv_progress(char *s, unsigned short hex) { } -static u64 pnv_dma_get_required_mask(struct device *dev) -{ - if (dev_is_pci(dev)) - return pnv_pci_dma_get_required_mask(to_pci_dev(dev)); - - return __dma_get_required_mask(dev); -} - static void pnv_shutdown(void) { /* Let the PCI code clear up IODA tables */ @@ -243,6 +235,13 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) } else { /* Primary waits for the secondaries to have reached OPAL */ pnv_kexec_wait_secondaries_down(); + + /* + * We might be running as little-endian - now that interrupts + * are disabled, reset the HILE bit to big-endian so we don't + * take interrupts in the wrong endian later + */ + opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); } } #endif /* CONFIG_KEXEC */ @@ -314,7 +313,6 @@ define_machine(powernv) { .machine_shutdown = pnv_shutdown, .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, - .dma_get_required_mask = pnv_dma_get_required_mask, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, #endif diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 8f70ba681..ca264833e 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -171,7 +171,26 @@ static void pnv_smp_cpu_kill_self(void) * so clear LPCR:PECE1. We keep PECE2 enabled. */ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); + + /* + * Hard-disable interrupts, and then clear irq_happened flags + * that we can safely ignore while off-line, since they + * are for things for which we do no processing when off-line + * (or in the case of HMI, all the processing we need to do + * is done in lower-level real-mode code). + */ + hard_irq_disable(); + local_paca->irq_happened &= ~(PACA_IRQ_DEC | PACA_IRQ_HMI); + while (!generic_check_cpu_restart(cpu)) { + /* + * Clear IPI flag, since we don't handle IPIs while + * offline, except for those when changing micro-threading + * mode, which are handled explicitly below, and those + * for coming online, which are handled via + * generic_check_cpu_restart() calls. + */ + kvmppc_set_host_ipi(cpu, 0); ppc64_runlatch_off(); @@ -196,20 +215,20 @@ static void pnv_smp_cpu_kill_self(void) * having finished executing in a KVM guest, then srr1 * contains 0. */ - if ((srr1 & wmask) == SRR1_WAKEEE) { + if (((srr1 & wmask) == SRR1_WAKEEE) || + (local_paca->irq_happened & PACA_IRQ_EE)) { icp_native_flush_interrupt(); - local_paca->irq_happened &= PACA_IRQ_HARD_DIS; - smp_mb(); } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); - kvmppc_set_host_ipi(cpu, 0); } + local_paca->irq_happened &= ~(PACA_IRQ_EE | PACA_IRQ_DBELL); + smp_mb(); if (cpu_core_split_required()) continue; - if (!generic_check_cpu_restart(cpu)) + if (srr1 && !generic_check_cpu_restart(cpu)) DBG("CPU%d Unexpected exit while offline !\n", cpu); } mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1); diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index f60f80ada..503a73f59 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -190,7 +190,7 @@ static void unsplit_core(void) hid0 = mfspr(SPRN_HID0); hid0 &= ~HID0_POWER8_DYNLPARDIS; - mtspr(SPRN_HID0, hid0); + update_power8_hid0(hid0); update_hid_in_slw(hid0); while (mfspr(SPRN_HID0) & mask) @@ -227,7 +227,7 @@ static void split_core(int new_mode) /* Write new mode */ hid0 = mfspr(SPRN_HID0); hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; - mtspr(SPRN_HID0, hid0); + update_power8_hid0(hid0); update_hid_in_slw(hid0); /* Wait for it to happen */ -- cgit v1.2.3-54-g00ecf