hw/fsp/fsp-mem-err.c - skiboot - Git at Google

 // SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
 /*
  * Sometimes some memory needs to go and sit in the naughty corner
  *
  * Copyright 2013-2019 IBM Corp.
  */

 #define pr_fmt(fmt) "FSPMEMERR: " fmt
 #include <skiboot.h>
 #include <opal.h>
 #include <opal-msg.h>
 #include <lock.h>
 #include <fsp.h>
 #include <errorlog.h>

 /* FSP sends real address of 4K memory page. */
 #define MEM_ERR_PAGE_SIZE_4K	(1UL << 12)

 /* maximum number of error event to hold until linux consumes it. */
 #define MERR_MAX_RECORD		1024

 struct fsp_mem_err_node {
 	struct list_node list;
 	struct OpalMemoryErrorData data;
 };

 static LIST_HEAD(merr_free_list);
 static LIST_HEAD(mem_error_list);
 /*
  * lock is used to protect overwriting of merr_free_list and mem_error_list
  * list.
  */
 static struct lock mem_err_lock = LOCK_UNLOCKED;

 DEFINE_LOG_ENTRY(OPAL_RC_MEM_ERR_RES, OPAL_PLATFORM_ERR_EVT, OPAL_MEM_ERR,
 			OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL,
 			OPAL_NA);

 DEFINE_LOG_ENTRY(OPAL_RC_MEM_ERR_DEALLOC, OPAL_PLATFORM_ERR_EVT, OPAL_MEM_ERR,
 			OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL,
 			OPAL_NA);

 static bool send_response_to_fsp(u32 cmd_sub_mod)
 {
 	struct fsp_msg *rsp;
 	int rc = -ENOMEM;

 	rsp = fsp_mkmsg(cmd_sub_mod, 0);
 	if (rsp)
 		rc = fsp_queue_msg(rsp, fsp_freemsg);
 	if (rc) {
 		fsp_freemsg(rsp);
 		/* XXX Generate error logs */
 		prerror("Error %d queueing FSP memory error reply\n", rc);
 		return false;
 	}
 	return true;
 }

 /*
  * Queue up the memory error message for delivery.
  *
  * queue_event_for_delivery get called from two places.
  * 1) from queue_mem_err_node when new fsp mem error is available and
  * 2) from completion callback indicating that linux has consumed an message.
  *
  * TODO:
  * There is a chance that, we may not get a free slot to queue our event
  * for delivery to linux during both the above invocations. In that case
  * we end up holding events with us until next fsp memory error comes in.
  * We need to address this case either here OR fix up messaging infrastructure
  * to make sure at least one slot will always be available per message type.
  *
  * XXX: BenH: I changed the msg infrastructure to attempt an allocation
  *            in that case, at least until we clarify a bit better how
  *            we want to handle things.
  */
 static void queue_event_for_delivery(void *data __unused, int staus __unused)
 {
 	struct fsp_mem_err_node *entry;
 	uint64_t *merr_data;
 	int rc;

 	lock(&mem_err_lock);
 	entry = list_pop(&mem_error_list, struct fsp_mem_err_node, list);
 	unlock(&mem_err_lock);

 	if (!entry)
 		return;

 	/*
 	 * struct OpalMemoryErrorData is of (4 * 64 bits) size and well packed
 	 * structure. Hence use uint64_t pointer to pass entire structure
 	 * using 4 params in generic message format.
 	 */
 	merr_data = (uint64_t *)&entry->data;

 	/* queue up for delivery */
 	rc = opal_queue_msg(OPAL_MSG_MEM_ERR, NULL, queue_event_for_delivery,
 			    cpu_to_be64(merr_data[0]),
 			    cpu_to_be64(merr_data[1]),
 			    cpu_to_be64(merr_data[2]),
 			    cpu_to_be64(merr_data[3]));
 	lock(&mem_err_lock);
 	if (rc) {
 		/*
 		 * Failed to queue up the event for delivery. No free slot
 		 * available. There is a chance that we are trying to queue
 		 * up multiple event at the same time. We may already have
 		 * at least one event queued up, in that case we will be
 		 * called again through completion callback and we should
 		 * be able to grab empty slot then.
 		 *
 		 * For now, put this node back on mem_error_list.
 		 */
 		list_add(&mem_error_list, &entry->list);
 	} else
 		list_add(&merr_free_list, &entry->list);
 	unlock(&mem_err_lock);
 }

 static int queue_mem_err_node(struct OpalMemoryErrorData *merr_evt)
 {
 	struct fsp_mem_err_node *entry;

 	lock(&mem_err_lock);
 	entry = list_pop(&merr_free_list, struct fsp_mem_err_node, list);
 	if (!entry) {
 		printf("Failed to queue up memory error event.\n");
 		unlock(&mem_err_lock);
 		return -ENOMEM;
 	}

 	entry->data = *merr_evt;
 	list_add(&mem_error_list, &entry->list);
 	unlock(&mem_err_lock);

 	/* Queue up the event for delivery to OS. */
 	queue_event_for_delivery(NULL, OPAL_SUCCESS);
 	return 0;
 }

 /* Check if memory resilience event for same address already exists. */
 static bool is_resilience_event_exist(u64 paddr)
 {
 	struct fsp_mem_err_node *entry;
 	struct OpalMemoryErrorData *merr_evt;
 	int found = 0;

 	lock(&mem_err_lock);
 	list_for_each(&mem_error_list, entry, list) {
 		merr_evt = &entry->data;
 		if ((merr_evt->type == OPAL_MEM_ERR_TYPE_RESILIENCE) &&
 		    (be64_to_cpu(merr_evt->u.resilience.physical_address_start)
 							    == paddr)) {
 			found = 1;
 			break;
 		}
 	}
 	unlock(&mem_err_lock);
 	return !!found;
 }

 /*
  * handle Memory Resilience error message.
  * Section 28.2 of Hypervisor to FSP Mailbox Interface Specification.
  *
  * The flow for Memory Resilence Event is:
  * 1. PRD component in FSP gets a recoverable attention from hardware when
  *    there is a corretable/uncorrectable memory error to free up a page.
  * 2. PRD sends Memory Resilence Command to hypervisor with the real address of
  *    the 4K memory page in which the error occurred.
  * 3. The hypervisor acknowledges with a status immediately. Immediate
  *    acknowledgment doesn’t require the freeing of the page to be completed.
  */
 static bool handle_memory_resilience(u32 cmd_sub_mod, u64 paddr)
 {
 	int rc = 0;
 	struct OpalMemoryErrorData mem_err_evt;
 	struct errorlog *buf;

 	memset(&mem_err_evt, 0, sizeof(struct OpalMemoryErrorData));
 	/* Check arguments */
 	if (paddr == 0) {
 		prerror("memory resilience: Invalid real address.\n");
 		return send_response_to_fsp(FSP_RSP_MEM_RES |
 					    FSP_STATUS_GENERIC_ERROR);
 	}

 	/* Check if event already exist for same address. */
 	if (is_resilience_event_exist(paddr))
 		goto send_response;

 	/* Populate an event. */
 	mem_err_evt.version = OpalMemErr_V1;
 	mem_err_evt.type = OPAL_MEM_ERR_TYPE_RESILIENCE;

 	switch (cmd_sub_mod) {
 	case FSP_CMD_MEM_RES_CE:
 		/*
 		 * Should we keep counter for corrected errors in
 		 * sapphire OR let linux (PowerNV) handle it?
 		 *
 		 * For now, send corrected errors to linux and let
 		 * linux handle corrected errors thresholding.
 		 */
 		mem_err_evt.flags |= cpu_to_be16(OPAL_MEM_CORRECTED_ERROR);
 		mem_err_evt.u.resilience.resil_err_type =
 					OPAL_MEM_RESILIENCE_CE;
 		break;
 	case FSP_CMD_MEM_RES_UE:
 		mem_err_evt.u.resilience.resil_err_type =
 					OPAL_MEM_RESILIENCE_UE;
 		break;
 	case FSP_CMD_MEM_RES_UE_SCRB:
 		mem_err_evt.u.resilience.resil_err_type =
 					OPAL_MEM_RESILIENCE_UE_SCRUB;
 		break;
 	}
 	mem_err_evt.u.resilience.physical_address_start = cpu_to_be64(paddr);
 	mem_err_evt.u.resilience.physical_address_end =
 		cpu_to_be64(paddr + MEM_ERR_PAGE_SIZE_4K);

 	/* Queue up the event and inform OS about it. */
 	rc = queue_mem_err_node(&mem_err_evt);

 send_response:
 	/* Queue up an OK response to the resilience message itself */
 	if (!rc)
 		return send_response_to_fsp(FSP_RSP_MEM_RES);
 	else {
 		buf = opal_elog_create(&e_info(OPAL_RC_MEM_ERR_RES), 0);
 		log_append_msg(buf,
 			"OPAL_MEM_ERR: Cannot queue up memory "
 			"resilience error event to the OS");
 		log_add_section(buf, OPAL_ELOG_SEC_DESC);
 		log_append_data(buf, (char *) &mem_err_evt,
 					   sizeof(struct OpalMemoryErrorData));
 		log_commit(buf);
 		return false;
 	}
 }

 /* update existing event entry if match is found. */
 static bool update_memory_deallocation_event(u64 paddr_start, u64 paddr_end)
 {
 	struct fsp_mem_err_node *entry;
 	struct OpalMemoryErrorData *merr_evt;
 	int found = 0;

 	lock(&mem_err_lock);
 	list_for_each(&mem_error_list, entry, list) {
 		merr_evt = &entry->data;
 		if ((merr_evt->type == OPAL_MEM_ERR_TYPE_DYN_DALLOC) &&
 		    (be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start)
 							    == paddr_start)) {
 			found = 1;
 			if (be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end)
 								< paddr_end)
 				merr_evt->u.dyn_dealloc.physical_address_end =
 					cpu_to_be64(paddr_end);
 			break;
 		}
 	}
 	unlock(&mem_err_lock);
 	return !!found;
 }

 /*
  * Handle dynamic memory deallocation message.
  *
  * When a condition occurs in which we need to do a large scale memory
  * deallocation, PRD will send a starting and ending address of an area of
  * memory to Hypervisor. Hypervisor then need to use this to deallocate all
  * pages between and including the addresses.
  *
  */
 static bool handle_memory_deallocation(u64 paddr_start, u64 paddr_end)
 {
 	int rc = 0;
 	u8 err = 0;
 	struct OpalMemoryErrorData mem_err_evt;
 	struct errorlog *buf;

 	memset(&mem_err_evt, 0, sizeof(struct OpalMemoryErrorData));
 	/* Check arguments */
 	if ((paddr_start == 0) || (paddr_end == 0)) {
 		prerror("memory deallocation: Invalid "
 			"starting/ending real address.\n");
 		err = FSP_STATUS_GENERIC_ERROR;
 	}

 	/* If we had an error, send response to fsp and return */
 	if (err)
 		return send_response_to_fsp(FSP_RSP_MEM_DYN_DEALLOC | err);

 	/*
 	 * FSP can send dynamic memory deallocation multiple times for the
 	 * same address/address ranges. Hence check and update if we already
 	 * have sam event queued.
 	 */
 	if (update_memory_deallocation_event(paddr_start, paddr_end))
 		goto send_response;

 	/* Populate an new event. */
 	mem_err_evt.version = OpalMemErr_V1;
 	mem_err_evt.type = OPAL_MEM_ERR_TYPE_DYN_DALLOC;
 	mem_err_evt.u.dyn_dealloc.dyn_err_type =
 					OPAL_MEM_DYNAMIC_DEALLOC;
 	mem_err_evt.u.dyn_dealloc.physical_address_start = cpu_to_be64(paddr_start);
 	mem_err_evt.u.dyn_dealloc.physical_address_end = cpu_to_be64(paddr_end);

 	/* Queue up the event and inform OS about it. */
 	rc = queue_mem_err_node(&mem_err_evt);

 send_response:
 	/* Queue up an OK response to the memory deallocation message itself */
 	if (!rc)
 		return send_response_to_fsp(FSP_RSP_MEM_DYN_DEALLOC);
 	else {
 		buf = opal_elog_create(&e_info(OPAL_RC_MEM_ERR_DEALLOC), 0);
 		log_append_msg(buf,
 			"OPAL_MEM_ERR: Cannot queue up memory "
 			"deallocation error event to the OS");
 		log_add_section(buf, OPAL_ELOG_SEC_DESC);
 		log_append_data(buf, (char *)&mem_err_evt,
 					   sizeof(struct OpalMemoryErrorData));
 		log_commit(buf);
 		return false;
 	}
 }

 /* Receive a memory error mesages and handle it. */
 static bool fsp_mem_err_msg(u32 cmd_sub_mod, struct fsp_msg *msg)
 {
 	u64 paddr_start, paddr_end;

 	printf("Received 0x%08ux command\n", cmd_sub_mod);
 	switch (cmd_sub_mod) {
 	case FSP_CMD_MEM_RES_CE:
 	case FSP_CMD_MEM_RES_UE:
 	case FSP_CMD_MEM_RES_UE_SCRB:
 		/*
 		 * We get the memory relilence command from FSP for
 		 * correctable/Uncorrectable/scrub UE errors with real
 		 * address of 4K memory page in which the error occurred.
 		 */
 		paddr_start = be64_to_cpu(*((__be64 *)&msg->data.bytes[0]));
 		printf("Got memory resilience error message for "
 		       "paddr=0x%016llux\n", paddr_start);
 		return handle_memory_resilience(cmd_sub_mod, paddr_start);
 	case FSP_CMD_MEM_DYN_DEALLOC:
 		paddr_start = be64_to_cpu(*((__be64 *)&msg->data.bytes[0]));
 		paddr_end = be64_to_cpu(*((__be64 *)&msg->data.bytes[8]));
 		printf("Got dynamic memory deallocation message: "
 		       "paddr_start=0x%016llux, paddr_end=0x%016llux\n",
 		       paddr_start, paddr_end);
 		return handle_memory_deallocation(paddr_start, paddr_end);
 	}
 	return false;
 }

 /*
  * pre allocate memory to hold maximum of 128 memory error event until linux
  * consumes it.
  */
 static int init_merr_free_list(uint32_t num_entries)
 {
 	struct fsp_mem_err_node *entry;
 	int i;

 	entry = zalloc(sizeof(struct fsp_mem_err_node) * num_entries);
 	if (!entry)
 		return -ENOMEM;

 	for (i = 0; i < num_entries; ++i, entry++)
 		list_add_tail(&merr_free_list, &entry->list);

 	return 0;
 }

 static struct fsp_client fsp_mem_err_client = {
 	.message = fsp_mem_err_msg,
 };

 void fsp_memory_err_init(void)
 {
 	int rc;

 	printf("Intializing fsp memory handling.\n");
 	/* If we have an FSP, register for notifications */
 	if (!fsp_present())
 		return;

 	/* pre allocate memory for 128 record */
 	rc = init_merr_free_list(MERR_MAX_RECORD);
 	if (rc < 0)
 		return;

 	fsp_register_client(&fsp_mem_err_client, FSP_MCLASS_MEMORY_ERR);
 }
	// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
	/*
	* Sometimes some memory needs to go and sit in the naughty corner
	*
	* Copyright 2013-2019 IBM Corp.
	*/

	#define pr_fmt(fmt) "FSPMEMERR: " fmt
	#include <skiboot.h>
	#include <opal.h>
	#include <opal-msg.h>
	#include <lock.h>
	#include <fsp.h>
	#include <errorlog.h>

	/* FSP sends real address of 4K memory page. */
	#define MEM_ERR_PAGE_SIZE_4K (1UL << 12)

	/* maximum number of error event to hold until linux consumes it. */
	#define MERR_MAX_RECORD 1024

	struct fsp_mem_err_node {
	struct list_node list;
	struct OpalMemoryErrorData data;
	};

	static LIST_HEAD(merr_free_list);
	static LIST_HEAD(mem_error_list);
	/*
	* lock is used to protect overwriting of merr_free_list and mem_error_list
	* list.
	*/
	static struct lock mem_err_lock = LOCK_UNLOCKED;

	DEFINE_LOG_ENTRY(OPAL_RC_MEM_ERR_RES, OPAL_PLATFORM_ERR_EVT, OPAL_MEM_ERR,
	OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL,
	OPAL_NA);

	DEFINE_LOG_ENTRY(OPAL_RC_MEM_ERR_DEALLOC, OPAL_PLATFORM_ERR_EVT, OPAL_MEM_ERR,
	OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL,
	OPAL_NA);

	static bool send_response_to_fsp(u32 cmd_sub_mod)
	{
	struct fsp_msg *rsp;
	int rc = -ENOMEM;

	rsp = fsp_mkmsg(cmd_sub_mod, 0);
	if (rsp)
	rc = fsp_queue_msg(rsp, fsp_freemsg);
	if (rc) {
	fsp_freemsg(rsp);
	/* XXX Generate error logs */
	prerror("Error %d queueing FSP memory error reply\n", rc);
	return false;
	}
	return true;
	}

	/*
	* Queue up the memory error message for delivery.
	*
	* queue_event_for_delivery get called from two places.
	* 1) from queue_mem_err_node when new fsp mem error is available and
	* 2) from completion callback indicating that linux has consumed an message.
	*
	* TODO:
	* There is a chance that, we may not get a free slot to queue our event
	* for delivery to linux during both the above invocations. In that case
	* we end up holding events with us until next fsp memory error comes in.
	* We need to address this case either here OR fix up messaging infrastructure
	* to make sure at least one slot will always be available per message type.
	*
	* XXX: BenH: I changed the msg infrastructure to attempt an allocation
	* in that case, at least until we clarify a bit better how
	* we want to handle things.
	*/
	static void queue_event_for_delivery(void *data __unused, int staus __unused)
	{
	struct fsp_mem_err_node *entry;
	uint64_t *merr_data;
	int rc;

	lock(&mem_err_lock);
	entry = list_pop(&mem_error_list, struct fsp_mem_err_node, list);
	unlock(&mem_err_lock);

	if (!entry)
	return;

	/*
	* struct OpalMemoryErrorData is of (4 * 64 bits) size and well packed
	* structure. Hence use uint64_t pointer to pass entire structure
	* using 4 params in generic message format.
	*/
	merr_data = (uint64_t *)&entry->data;

	/* queue up for delivery */
	rc = opal_queue_msg(OPAL_MSG_MEM_ERR, NULL, queue_event_for_delivery,
	cpu_to_be64(merr_data[0]),
	cpu_to_be64(merr_data[1]),
	cpu_to_be64(merr_data[2]),
	cpu_to_be64(merr_data[3]));
	lock(&mem_err_lock);
	if (rc) {
	/*
	* Failed to queue up the event for delivery. No free slot
	* available. There is a chance that we are trying to queue
	* up multiple event at the same time. We may already have
	* at least one event queued up, in that case we will be
	* called again through completion callback and we should
	* be able to grab empty slot then.
	*
	* For now, put this node back on mem_error_list.
	*/
	list_add(&mem_error_list, &entry->list);
	} else
	list_add(&merr_free_list, &entry->list);
	unlock(&mem_err_lock);
	}

	static int queue_mem_err_node(struct OpalMemoryErrorData *merr_evt)
	{
	struct fsp_mem_err_node *entry;

	lock(&mem_err_lock);
	entry = list_pop(&merr_free_list, struct fsp_mem_err_node, list);
	if (!entry) {
	printf("Failed to queue up memory error event.\n");
	unlock(&mem_err_lock);
	return -ENOMEM;
	}

	entry->data = *merr_evt;
	list_add(&mem_error_list, &entry->list);
	unlock(&mem_err_lock);

	/* Queue up the event for delivery to OS. */
	queue_event_for_delivery(NULL, OPAL_SUCCESS);
	return 0;
	}

	/* Check if memory resilience event for same address already exists. */
	static bool is_resilience_event_exist(u64 paddr)
	{
	struct fsp_mem_err_node *entry;
	struct OpalMemoryErrorData *merr_evt;
	int found = 0;

	lock(&mem_err_lock);
	list_for_each(&mem_error_list, entry, list) {
	merr_evt = &entry->data;
	if ((merr_evt->type == OPAL_MEM_ERR_TYPE_RESILIENCE) &&
	(be64_to_cpu(merr_evt->u.resilience.physical_address_start)
	== paddr)) {
	found = 1;
	break;
	}
	}
	unlock(&mem_err_lock);
	return !!found;
	}

	/*
	* handle Memory Resilience error message.
	* Section 28.2 of Hypervisor to FSP Mailbox Interface Specification.
	*
	* The flow for Memory Resilence Event is:
	* 1. PRD component in FSP gets a recoverable attention from hardware when
	* there is a corretable/uncorrectable memory error to free up a page.
	* 2. PRD sends Memory Resilence Command to hypervisor with the real address of
	* the 4K memory page in which the error occurred.
	* 3. The hypervisor acknowledges with a status immediately. Immediate
	* acknowledgment doesn’t require the freeing of the page to be completed.
	*/
	static bool handle_memory_resilience(u32 cmd_sub_mod, u64 paddr)
	{
	int rc = 0;
	struct OpalMemoryErrorData mem_err_evt;
	struct errorlog *buf;

	memset(&mem_err_evt, 0, sizeof(struct OpalMemoryErrorData));
	/* Check arguments */
	if (paddr == 0) {
	prerror("memory resilience: Invalid real address.\n");
	return send_response_to_fsp(FSP_RSP_MEM_RES \|
	FSP_STATUS_GENERIC_ERROR);
	}

	/* Check if event already exist for same address. */
	if (is_resilience_event_exist(paddr))
	goto send_response;

	/* Populate an event. */
	mem_err_evt.version = OpalMemErr_V1;
	mem_err_evt.type = OPAL_MEM_ERR_TYPE_RESILIENCE;

	switch (cmd_sub_mod) {
	case FSP_CMD_MEM_RES_CE:
	/*
	* Should we keep counter for corrected errors in
	* sapphire OR let linux (PowerNV) handle it?
	*
	* For now, send corrected errors to linux and let
	* linux handle corrected errors thresholding.
	*/
	mem_err_evt.flags \|= cpu_to_be16(OPAL_MEM_CORRECTED_ERROR);
	mem_err_evt.u.resilience.resil_err_type =
	OPAL_MEM_RESILIENCE_CE;
	break;
	case FSP_CMD_MEM_RES_UE:
	mem_err_evt.u.resilience.resil_err_type =
	OPAL_MEM_RESILIENCE_UE;
	break;
	case FSP_CMD_MEM_RES_UE_SCRB:
	mem_err_evt.u.resilience.resil_err_type =
	OPAL_MEM_RESILIENCE_UE_SCRUB;
	break;
	}
	mem_err_evt.u.resilience.physical_address_start = cpu_to_be64(paddr);
	mem_err_evt.u.resilience.physical_address_end =
	cpu_to_be64(paddr + MEM_ERR_PAGE_SIZE_4K);

	/* Queue up the event and inform OS about it. */
	rc = queue_mem_err_node(&mem_err_evt);

	send_response:
	/* Queue up an OK response to the resilience message itself */
	if (!rc)
	return send_response_to_fsp(FSP_RSP_MEM_RES);
	else {
	buf = opal_elog_create(&e_info(OPAL_RC_MEM_ERR_RES), 0);
	log_append_msg(buf,
	"OPAL_MEM_ERR: Cannot queue up memory "
	"resilience error event to the OS");
	log_add_section(buf, OPAL_ELOG_SEC_DESC);
	log_append_data(buf, (char *) &mem_err_evt,
	sizeof(struct OpalMemoryErrorData));
	log_commit(buf);
	return false;
	}
	}

	/* update existing event entry if match is found. */
	static bool update_memory_deallocation_event(u64 paddr_start, u64 paddr_end)
	{
	struct fsp_mem_err_node *entry;
	struct OpalMemoryErrorData *merr_evt;
	int found = 0;

	lock(&mem_err_lock);
	list_for_each(&mem_error_list, entry, list) {
	merr_evt = &entry->data;
	if ((merr_evt->type == OPAL_MEM_ERR_TYPE_DYN_DALLOC) &&
	(be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start)
	== paddr_start)) {
	found = 1;
	if (be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end)
	< paddr_end)
	merr_evt->u.dyn_dealloc.physical_address_end =
	cpu_to_be64(paddr_end);
	break;
	}
	}
	unlock(&mem_err_lock);
	return !!found;
	}

	/*
	* Handle dynamic memory deallocation message.
	*
	* When a condition occurs in which we need to do a large scale memory
	* deallocation, PRD will send a starting and ending address of an area of
	* memory to Hypervisor. Hypervisor then need to use this to deallocate all
	* pages between and including the addresses.
	*
	*/
	static bool handle_memory_deallocation(u64 paddr_start, u64 paddr_end)
	{
	int rc = 0;
	u8 err = 0;
	struct OpalMemoryErrorData mem_err_evt;
	struct errorlog *buf;

	memset(&mem_err_evt, 0, sizeof(struct OpalMemoryErrorData));
	/* Check arguments */
	if ((paddr_start == 0) \|\| (paddr_end == 0)) {
	prerror("memory deallocation: Invalid "
	"starting/ending real address.\n");
	err = FSP_STATUS_GENERIC_ERROR;
	}

	/* If we had an error, send response to fsp and return */
	if (err)
	return send_response_to_fsp(FSP_RSP_MEM_DYN_DEALLOC \| err);

	/*
	* FSP can send dynamic memory deallocation multiple times for the
	* same address/address ranges. Hence check and update if we already
	* have sam event queued.
	*/
	if (update_memory_deallocation_event(paddr_start, paddr_end))
	goto send_response;

	/* Populate an new event. */
	mem_err_evt.version = OpalMemErr_V1;
	mem_err_evt.type = OPAL_MEM_ERR_TYPE_DYN_DALLOC;
	mem_err_evt.u.dyn_dealloc.dyn_err_type =
	OPAL_MEM_DYNAMIC_DEALLOC;
	mem_err_evt.u.dyn_dealloc.physical_address_start = cpu_to_be64(paddr_start);
	mem_err_evt.u.dyn_dealloc.physical_address_end = cpu_to_be64(paddr_end);

	/* Queue up the event and inform OS about it. */
	rc = queue_mem_err_node(&mem_err_evt);

	send_response:
	/* Queue up an OK response to the memory deallocation message itself */
	if (!rc)
	return send_response_to_fsp(FSP_RSP_MEM_DYN_DEALLOC);
	else {
	buf = opal_elog_create(&e_info(OPAL_RC_MEM_ERR_DEALLOC), 0);
	log_append_msg(buf,
	"OPAL_MEM_ERR: Cannot queue up memory "
	"deallocation error event to the OS");
	log_add_section(buf, OPAL_ELOG_SEC_DESC);
	log_append_data(buf, (char *)&mem_err_evt,
	sizeof(struct OpalMemoryErrorData));
	log_commit(buf);
	return false;
	}
	}

	/* Receive a memory error mesages and handle it. */
	static bool fsp_mem_err_msg(u32 cmd_sub_mod, struct fsp_msg *msg)
	{
	u64 paddr_start, paddr_end;

	printf("Received 0x%08ux command\n", cmd_sub_mod);
	switch (cmd_sub_mod) {
	case FSP_CMD_MEM_RES_CE:
	case FSP_CMD_MEM_RES_UE:
	case FSP_CMD_MEM_RES_UE_SCRB:
	/*
	* We get the memory relilence command from FSP for
	* correctable/Uncorrectable/scrub UE errors with real
	* address of 4K memory page in which the error occurred.
	*/
	paddr_start = be64_to_cpu(((__be64 )&msg->data.bytes[0]));
	printf("Got memory resilience error message for "
	"paddr=0x%016llux\n", paddr_start);
	return handle_memory_resilience(cmd_sub_mod, paddr_start);
	case FSP_CMD_MEM_DYN_DEALLOC:
	paddr_start = be64_to_cpu(((__be64 )&msg->data.bytes[0]));
	paddr_end = be64_to_cpu(((__be64 )&msg->data.bytes[8]));
	printf("Got dynamic memory deallocation message: "
	"paddr_start=0x%016llux, paddr_end=0x%016llux\n",
	paddr_start, paddr_end);
	return handle_memory_deallocation(paddr_start, paddr_end);
	}
	return false;
	}

	/*
	* pre allocate memory to hold maximum of 128 memory error event until linux
	* consumes it.
	*/
	static int init_merr_free_list(uint32_t num_entries)
	{
	struct fsp_mem_err_node *entry;
	int i;

	entry = zalloc(sizeof(struct fsp_mem_err_node) * num_entries);
	if (!entry)
	return -ENOMEM;

	for (i = 0; i < num_entries; ++i, entry++)
	list_add_tail(&merr_free_list, &entry->list);

	return 0;
	}

	static struct fsp_client fsp_mem_err_client = {
	.message = fsp_mem_err_msg,
	};

	void fsp_memory_err_init(void)
	{
	int rc;

	printf("Intializing fsp memory handling.\n");
	/* If we have an FSP, register for notifications */
	if (!fsp_present())
	return;

	/* pre allocate memory for 128 record */
	rc = init_merr_free_list(MERR_MAX_RECORD);
	if (rc < 0)
	return;

	fsp_register_client(&fsp_mem_err_client, FSP_MCLASS_MEMORY_ERR);
	}