[loong64] Add initial support for LoongArch64

Add support for building a LoongArch64 Linux userspace binary.

Signed-off-by: Xiaotian Wu <wuxiaotian@loongson.cn>
Modified-by: Michael Brown <mcb30@ipxe.org>
Signed-off-by: Michael Brown <mcb30@ipxe.org>
diff --git a/src/arch/loong64/Makefile b/src/arch/loong64/Makefile
new file mode 100644
index 0000000..f2dfc76
--- /dev/null
+++ b/src/arch/loong64/Makefile
@@ -0,0 +1,26 @@
+# Assembler section type character
+#
+ASM_TCHAR	:= @
+ASM_TCHAR_OPS	:= @
+
+# LoongArch64-specific flags
+#
+CFLAGS		+= -fstrength-reduce -fomit-frame-pointer
+CFLAGS		+= -falign-jumps=1 -falign-loops=1 -falign-functions=1
+
+# Check if -mno-explicit-relocs is valid
+ifeq ($(CCTYPE),gcc)
+MNER_TEST = $(CC) -mno-explicit-relocs -x c -c /dev/null -o /dev/null >/dev/null 2>&1
+MNER_FLAGS := $(shell $(MNER_TEST) && $(ECHO) '-mno-explicit-relocs')
+WORKAROUND_CFLAGS += $(MNER_FLAGS)
+endif
+
+# EFI requires -fshort-wchar, and nothing else currently uses wchar_t
+CFLAGS		+= -fshort-wchar
+
+# LoongArch64-specific directories containing source files
+SRCDIRS		+= arch/loong64/core
+
+# Include platform-specific Makefile
+MAKEDEPS	+= arch/loong64/Makefile.$(PLATFORM)
+include arch/loong64/Makefile.$(PLATFORM)
diff --git a/src/arch/loong64/Makefile.linux b/src/arch/loong64/Makefile.linux
new file mode 100644
index 0000000..992b371
--- /dev/null
+++ b/src/arch/loong64/Makefile.linux
@@ -0,0 +1,10 @@
+# -*- makefile -*- : Force emacs to use Makefile mode
+
+# Starting virtual address
+#
+LDFLAGS += -Ttext=0x120000000
+
+# Include generic Linux Makefile
+#
+MAKEDEPS += Makefile.linux
+include Makefile.linux
diff --git a/src/arch/loong64/core/loong64_bigint.c b/src/arch/loong64/core/loong64_bigint.c
new file mode 100644
index 0000000..f42b861
--- /dev/null
+++ b/src/arch/loong64/core/loong64_bigint.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>.
+ * Copyright (c) 2023, Xiaotian Wu <wuxiaotian@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ * You can also choose to distribute this program under the terms of
+ * the Unmodified Binary Distribution Licence (as given in the file
+ * COPYING.UBDL), provided that you have satisfied its requirements.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#include <stdint.h>
+#include <string.h>
+#include <ipxe/bigint.h>
+
+/** @file
+ *
+ * Big integer support
+ */
+
+/**
+ * Multiply big integers
+ *
+ * @v multiplicand0	Element 0 of big integer to be multiplied
+ * @v multiplier0	Element 0 of big integer to be multiplied
+ * @v result0		Element 0 of big integer to hold result
+ * @v size		Number of elements
+ */
+void bigint_multiply_raw ( const uint64_t *multiplicand0,
+			   const uint64_t *multiplier0,
+			   uint64_t *result0, unsigned int size ) {
+	const bigint_t ( size ) __attribute__ (( may_alias )) *multiplicand =
+		( ( const void * ) multiplicand0 );
+	const bigint_t ( size ) __attribute__ (( may_alias )) *multiplier =
+		( ( const void * ) multiplier0 );
+	bigint_t ( size * 2 ) __attribute__ (( may_alias )) *result =
+		( ( void * ) result0 );
+	unsigned int i;
+	unsigned int j;
+	uint64_t multiplicand_element;
+	uint64_t multiplier_element;
+	uint64_t *result_elements;
+	uint64_t discard_low;
+	uint64_t discard_high;
+	uint64_t discard_temp_low;
+	uint64_t discard_temp_high;
+
+	/* Zero result */
+	memset ( result, 0, sizeof ( *result ) );
+
+	/* Multiply integers one element at a time */
+	for ( i = 0 ; i < size ; i++ ) {
+		multiplicand_element = multiplicand->element[i];
+		for ( j = 0 ; j < size ; j++ ) {
+			multiplier_element = multiplier->element[j];
+			result_elements = &result->element[ i + j ];
+			/* Perform a single multiply, and add the
+			 * resulting double-element into the result,
+			 * carrying as necessary.  The carry can
+			 * never overflow beyond the end of the
+			 * result, since:
+			 *
+			 *     a < 2^{n}, b < 2^{n} => ab < 2^{2n}
+			 */
+			__asm__ __volatile__ ( "mul.d   %1, %6, %7\n\t"
+					       "mulh.du %2, %6, %7\n\t"
+
+					       "ld.d    %3, %0, 0\n\t"
+					       "ld.d    %4, %0, 8\n\t"
+
+					       "add.d   %3, %3, %1\n\t"
+					       "sltu    $t0, %3, %1\n\t"
+
+					       "add.d   %4, %4, %2\n\t"
+					       "sltu    $t1, %4, %2\n\t"
+
+					       "add.d   %4, %4, $t0\n\t"
+					       "sltu    $t0, %4, $t0\n\t"
+					       "or      $t0, $t0, $t1\n\t"
+
+					       "st.d    %3,  %0, 0\n\t"
+					       "st.d    %4,  %0, 8\n\t"
+
+					       "addi.d  %0,  %0, 16\n\t"
+					       "beqz    $t0, 2f\n"
+					       "1:\n\t"
+					       "ld.d    %3,  %0, 0\n\t"
+					       "add.d   %3,  %3, $t0\n\t"
+					       "sltu    $t0, %3, $t0\n\t"
+					       "st.d    %3,  %0, 0\n\t"
+					       "addi.d  %0, %0, 8\n\t"
+					       "bnez    $t0, 1b\n"
+					       "2:"
+					       : "+r" ( result_elements ),
+						 "=&r" ( discard_low ),
+						 "=&r" ( discard_high ),
+						 "=r" ( discard_temp_low ),
+						 "=r" ( discard_temp_high ),
+						 "+m" ( *result )
+					       : "r" ( multiplicand_element ),
+						 "r" ( multiplier_element )
+					       : "t0", "t1" );
+		}
+	}
+}
diff --git a/src/arch/loong64/core/loong64_string.c b/src/arch/loong64/core/loong64_string.c
new file mode 100644
index 0000000..941b7e2
--- /dev/null
+++ b/src/arch/loong64/core/loong64_string.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
+ * Copyright (c) 2023, Xiaotian Wu <wuxiaotian@loongson.cn>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ * You can also choose to distribute this program under the terms of
+ * the Unmodified Binary Distribution Licence (as given in the file
+ * COPYING.UBDL), provided that you have satisfied its requirements.
+ */
+
+/** @file
+ *
+ * Optimised string operations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#include <string.h>
+
+/**
+ * Copy memory area
+ *
+ * @v dest		Destination address
+ * @v src		Source address
+ * @v len		Length
+ * @ret dest		Destination address
+ */
+void loong64_memcpy ( void *dest, const void *src, size_t len ) {
+	void *discard_dest;
+	void *discard_end;
+	const void *discard_src;
+	size_t discard_offset;
+	unsigned long discard_data;
+	unsigned long discard_low;
+	unsigned long discard_high;
+
+	/* If length is too short, then just copy individual bytes.
+	 */
+	if ( len < 16 ) {
+		__asm__ __volatile__ ( "beqz %0, 2f\n\t"
+				       "\n1:\n\t"
+				       "addi.d %0, %0, -1\n\t"
+				       "ldx.b %1, %3, %0\n\t"
+				       "stx.b %1, %2, %0\n\t"
+				       "bnez %0, 1b\n\t"
+				       "\n2:\n\t"
+				       : "=&r" ( discard_offset ),
+					 "=&r" ( discard_data )
+				       : "r" ( dest ), "r" ( src ), "0" ( len )
+				       : "memory", "t0" );
+		return;
+	}
+
+	/* Copy 16 bytes at a time: one initial
+	 * potentially unaligned access, multiple destination-aligned
+	 * accesses, one final potentially unaligned access.
+	 */
+	__asm__ __volatile__ ( "ld.d %3, %1, 0\n\t"
+			       "ld.d %4, %1, 8\n\t"
+			       "addi.d %1, %1, 16\n\t"
+			       "st.d %3, %0, 0\n\t"
+			       "st.d %4, %0, 8\n\t"
+			       "addi.d %0, %0, 16\n\t"
+			       "andi %3, %0, 15\n\t"
+			       "sub.d %0, %0, %3\n\t"
+			       "sub.d %1, %1, %3\n\t"
+			       "addi.d $t0, $zero, 0xf\n\t"
+			       "andn %2, %5, $t0\n\t"
+			       "b 2f\n\t"
+			       "\n1:\n\t"
+			       "ld.d %3, %1, 0\n\t"
+			       "ld.d %4, %1, 8\n\t"
+			       "addi.d %1, %1, 16\n\t"
+			       "st.d %3, %0, 0\n\t"
+			       "st.d %4, %0, 8\n\t"
+			       "addi.d %0, %0, 16\n\t"
+			       "\n2:\n\t"
+			       "bne %0, %2, 1b\n\t"
+			       "ld.d %3, %6, -16\n\t"
+			       "ld.d %4, %6, -8\n\t"
+			       "st.d %3, %5, -16\n\t"
+			       "st.d %4, %5, -8\n\t"
+			       : "=&r" ( discard_dest ),
+				 "=&r" ( discard_src ),
+				 "=&r" ( discard_end ),
+				 "=&r" ( discard_low ),
+				 "=&r" ( discard_high )
+			       : "r" ( dest + len ), "r" ( src + len ),
+				 "0" ( dest ), "1" ( src )
+			       : "memory", "t0" );
+}
+
+/**
+ * Zero memory region
+ *
+ * @v dest		Destination region
+ * @v len		Length
+ */
+void loong64_bzero ( void *dest, size_t len ) {
+	size_t discard_offset;
+	void *discard_dest;
+	void *discard_end;
+
+	/* If length is too short, then just zero individual bytes.
+	 */
+	if ( len < 16 ) {
+		__asm__ __volatile__ ( "beqz %0, 2f\n\t"
+				       "\n1:\n\t"
+				       "addi.d %0, %0, -1\n\t"
+				       "stx.b $zero, %1, %0\n\t"
+				       "bnez %0, 1b\n\t"
+				       "\n2:\n\t"
+				       : "=&r" ( discard_offset )
+				       : "r" ( dest ), "0" ( len )
+				       : "memory" );
+		return;
+	}
+
+	/* To zero 16 bytes at a time: one initial
+	 * potentially unaligned access, multiple aligned accesses,
+	 * one final potentially unaligned access.
+	 */
+
+	__asm__ __volatile__ ( "st.d $zero, %0, 0\n\t"
+			       "st.d $zero, %0, 8\n\t"
+			       "addi.d %0, %0, 16\n\t"
+			       "addi.w $t0, $zero, 15\n\t"
+			       "andn %0, %0, $t0\n\t"
+			       "addi.w $t0, $zero, 15\n\t"
+			       "andn %1, %2, $t0\n\t"
+			       "b 2f\n\t"
+			       "\n1:\n\t"
+			       "st.d $zero, %0, 0\n\t"
+			       "st.d $zero, %0, 8\n\t"
+			       "addi.d %0, %0, 16\n\t"
+			       "\n2:\n\t"
+			       "bne %0, %1, 1b\n\t"
+			       "st.d $zero, %2, -16\n\t"
+			       "st.d $zero, %2, -8\n\t"
+			       : "=&r" ( discard_dest ),
+				 "=&r" ( discard_end )
+			       : "r" ( dest + len ), "0" ( dest )
+			       : "memory", "t0" );
+}
+
+/**
+ * Fill memory region
+ *
+ * @v dest		Destination region
+ * @v len		Length
+ * @v character		Fill character
+ *
+ * The unusual parameter order is to allow for more efficient
+ * tail-calling to loong64_memset() when zeroing a region.
+ */
+void loong64_memset ( void *dest, size_t len, int character ) {
+	size_t discard_offset;
+
+	/* Use optimised zeroing code if applicable */
+	if ( character == 0 ) {
+		loong64_bzero ( dest, len );
+		return;
+	}
+
+	/* Fill one byte at a time.  Calling memset() with a non-zero
+	 * value is relatively rare and unlikely to be
+	 * performance-critical.
+	 */
+	__asm__ __volatile__ ( "beqz %0, 2f\n\t"
+			       "\n1:\n\t"
+			       "addi.d %0, %0, -1\n\t"
+			       "stx.b %2, %1, %0\n\t"
+			       "bnez %0, 1b\n\t"
+			       "\n2:\n\t"
+			       : "=&r" ( discard_offset )
+			       : "r" ( dest ), "r" ( character ), "0" ( len )
+			       : "memory" );
+}
+
+/**
+ * Copy (possibly overlapping) memory region forwards
+ *
+ * @v dest		Destination region
+ * @v src		Source region
+ * @v len		Length
+ */
+void loong64_memmove_forwards ( void *dest, const void *src, size_t len ) {
+	void *discard_dest;
+	const void *discard_src;
+	unsigned long discard_data;
+
+	/* Assume memmove() is not performance-critical, and perform a
+	 * bytewise copy for simplicity.
+	 */
+	__asm__ __volatile__ ( "b 2f\n\t"
+			       "\n1:\n\t"
+			       "ld.b %2, %1, 0\n\t"
+			       "addi.d %1, %1, 1\n\t"
+			       "st.b %2, %0, 0\n\t"
+			       "addi.d %0, %0, 1\n\t"
+			       "\n2:\n\t"
+			       "bne %0, %3, 1b\n\t"
+			       : "=&r" ( discard_dest ),
+				 "=&r" ( discard_src ),
+				 "=&r" ( discard_data )
+			       : "r" ( dest + len ), "0" ( dest ), "1" ( src )
+			       : "memory" );
+}
+
+/**
+ * Copy (possibly overlapping) memory region backwards
+ *
+ * @v dest		Destination region
+ * @v src		Source region
+ * @v len		Length
+ */
+void loong64_memmove_backwards ( void *dest, const void *src, size_t len ) {
+	size_t discard_offset;
+	unsigned long discard_data;
+
+	/* Assume memmove() is not performance-critical, and perform a
+	 * bytewise copy for simplicity.
+	 */
+	__asm__ __volatile__ ( "beqz %0, 2f\n\t"
+			       "\n1:\n\t"
+			       "addi.d %0, %0, -1\n\t"
+			       "ldx.b %1, %3, %0\n\t"
+			       "stx.b %1, %2, %0\n\t"
+			       "bnez %0, 1b\n\t"
+			       "\n2:\n\t"
+			       : "=&r" ( discard_offset ),
+				 "=&r" ( discard_data )
+			       : "r" ( dest ), "r" ( src ), "0" ( len )
+			       : "memory" );
+}
+
+/**
+ * Copy (possibly overlapping) memory region
+ *
+ * @v dest		Destination region
+ * @v src		Source region
+ * @v len		Length
+ */
+void loong64_memmove ( void *dest, const void *src, size_t len ) {
+
+	if ( dest <= src ) {
+		loong64_memmove_forwards ( dest, src, len );
+	} else {
+		loong64_memmove_backwards ( dest, src, len );
+	}
+}
diff --git a/src/arch/loong64/core/setjmp.S b/src/arch/loong64/core/setjmp.S
new file mode 100644
index 0000000..36ad1dd
--- /dev/null
+++ b/src/arch/loong64/core/setjmp.S
@@ -0,0 +1,53 @@
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL )
+
+	.section ".note.GNU-stack", "", %progbits
+	.text
+/*
+   int setjmp(jmp_buf env);
+*/
+	.globl	setjmp
+	.type	setjmp, %function
+setjmp:
+	/* Store registers */
+	st.d	$s0, $a0, 0x0
+	st.d	$s1, $a0, 0x8
+	st.d	$s2, $a0, 0x10
+	st.d	$s3, $a0, 0x18
+	st.d	$s4, $a0, 0x20
+	st.d	$s5, $a0, 0x28
+	st.d	$s6, $a0, 0x30
+	st.d	$s7, $a0, 0x38
+	st.d	$s8, $a0, 0x40
+	st.d	$fp, $a0, 0x48
+	st.d	$sp, $a0, 0x50
+	st.d	$ra, $a0, 0x58
+
+	move	$a0, $zero
+	jirl	$zero, $ra, 0
+	.size	setjmp, . - setjmp
+
+/*
+   void longjmp(jmp_buf env, int val);
+*/
+	.globl	longjmp
+	.type	longjmp, %function
+longjmp:
+	/* Restore registers */
+	ld.d	$s0, $a0, 0x0
+	ld.d	$s1, $a0, 0x8
+	ld.d	$s2, $a0, 0x10
+	ld.d	$s3, $a0, 0x18
+	ld.d	$s4, $a0, 0x20
+	ld.d	$s5, $a0, 0x28
+	ld.d	$s6, $a0, 0x30
+	ld.d	$s7, $a0, 0x38
+	ld.d	$s8, $a0, 0x40
+	ld.d	$fp, $a0, 0x48
+	ld.d	$sp, $a0, 0x50
+	ld.d	$ra, $a0, 0x58
+	addi.d	$a0, $zero, 1    # a0 = 1
+	beqz	$a1, .exit       # if (a1 == 0); goto L0
+	move	$a0, $a1         # a0 = a1
+.exit:
+	jirl	$zero, $ra, 0
+	.size	longjmp, . - longjmp
diff --git a/src/arch/loong64/include/bits/acpi.h b/src/arch/loong64/include/bits/acpi.h
new file mode 100644
index 0000000..83dd1df
--- /dev/null
+++ b/src/arch/loong64/include/bits/acpi.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_ACPI_H
+#define _BITS_ACPI_H
+
+/** @file
+ *
+ * LoongArch64-specific ACPI API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_ACPI_H */
diff --git a/src/arch/loong64/include/bits/bigint.h b/src/arch/loong64/include/bits/bigint.h
new file mode 100644
index 0000000..89e0b86
--- /dev/null
+++ b/src/arch/loong64/include/bits/bigint.h
@@ -0,0 +1,336 @@
+#ifndef _BITS_BIGINT_H
+#define _BITS_BIGINT_H
+
+/** @file
+ *
+ * Big integer support
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+
+/** Element of a big integer */
+typedef uint64_t bigint_element_t;
+
+/**
+ * Initialise big integer
+ *
+ * @v value0	Element 0 of big integer to initialise
+ * @v size		Number of elements
+ * @v data		Raw data
+ * @v len		Length of raw data
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_init_raw ( uint64_t *value0, unsigned int size,
+		  const void *data, size_t len ) {
+	size_t pad_len = ( sizeof ( bigint_t ( size ) ) - len );
+	uint8_t *value_byte = ( ( void * ) value0 );
+	const uint8_t *data_byte = ( data + len );
+
+	/* Copy raw data in reverse order, padding with zeros */
+	while ( len-- )
+		*(value_byte++) = *(--data_byte);
+	while ( pad_len-- )
+		*(value_byte++) = 0;
+}
+
+/**
+ * Add big integers
+ *
+ * @v addend0		Element 0 of big integer to add
+ * @v value0		Element 0 of big integer to be added to
+ * @v size		Number of elements
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_add_raw ( const uint64_t *addend0, uint64_t *value0,
+		 unsigned int size ) {
+	bigint_t ( size ) __attribute__ (( may_alias )) *value =
+		( ( void * ) value0 );
+	uint64_t *discard_addend;
+	uint64_t *discard_value;
+	uint64_t discard_addend_i;
+	uint64_t discard_value_i;
+	unsigned int discard_size;
+	__asm__ __volatile__ ( "move   $t0, $zero\n"
+			       "1:\n\t"
+			       "ld.d   %3, %0, 0\n\t"
+			       "addi.d %0, %0, 8\n\t"
+			       "ld.d   %4, %1, 0\n\t"
+
+			       "add.d  %4, %4, $t0\n\t"
+			       "sltu   $t0, %4, $t0\n\t"
+
+			       "add.d  %4, %4, %3\n\t"
+			       "sltu   $t1, %4, %3\n\t"
+
+			       "or     $t0, $t0, $t1\n\t"
+			       "st.d   %4,  %1, 0\n\t"
+			       "addi.d %1, %1, 8\n\t"
+			       "addi.w %2, %2, -1\n\t"
+			       "bnez   %2, 1b"
+			       : "=r" ( discard_addend ),
+				 "=r" ( discard_value ),
+				 "=r" ( discard_size ),
+				 "=r" ( discard_addend_i ),
+				 "=r" ( discard_value_i ),
+				 "+m" ( *value )
+			       : "0" ( addend0 ),
+				 "1" ( value0 ),
+				 "2" ( size )
+			       : "t0", "t1" );
+}
+
+/**
+ * Subtract big integers
+ *
+ * @v subtrahend0	Element 0 of big integer to subtract
+ * @v value0		Element 0 of big integer to be subtracted from
+ * @v size		Number of elements
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_subtract_raw ( const uint64_t *subtrahend0, uint64_t *value0,
+		      unsigned int size ) {
+	uint64_t *discard_subtrahend;
+	uint64_t *discard_value;
+	uint64_t discard_subtrahend_i;
+	uint64_t discard_value_i;
+	unsigned int discard_size;
+	unsigned int flag = 0;
+
+	discard_subtrahend = (uint64_t*) subtrahend0;
+	discard_value = value0;
+	discard_size = size;
+
+	do {
+		discard_subtrahend_i = *discard_subtrahend;
+		discard_subtrahend++;
+		discard_value_i = *discard_value;
+
+		discard_value_i = discard_value_i - discard_subtrahend_i - flag;
+
+		if ( *discard_value < (discard_subtrahend_i + flag)) {
+			flag = 1;
+		} else {
+			flag = 0;
+		}
+
+		*discard_value = discard_value_i;
+
+		discard_value++;
+		discard_size -= 1;
+	} while (discard_size != 0);
+}
+
+/**
+ * Rotate big integer left
+ *
+ * @v value0		Element 0 of big integer
+ * @v size		Number of elements
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_rol_raw ( uint64_t *value0, unsigned int size ) {
+	uint64_t *discard_value;
+	uint64_t discard_value_i;
+	unsigned int discard_size;
+	uint64_t current_value_i;
+	unsigned int flag = 0;
+
+	discard_value = value0;
+	discard_size = size;
+	do {
+		discard_value_i = *discard_value;
+		current_value_i = discard_value_i;
+
+		discard_value_i += discard_value_i + flag;
+
+		if (discard_value_i < current_value_i) {
+			flag = 1;
+		} else {
+			flag = 0;
+		}
+
+		*discard_value = discard_value_i;
+		discard_value++;
+		discard_size -= 1;
+	} while ( discard_size != 0 );
+}
+
+/**
+ * Rotate big integer right
+ *
+ * @v value0		Element 0 of big integer
+ * @v size		Number of elements
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_ror_raw ( uint64_t *value0, unsigned int size ) {
+	uint64_t *discard_value;
+	uint64_t discard_value_i;
+	uint64_t discard_value_j;
+	unsigned int discard_size;
+
+	discard_value = value0;
+	discard_size = size;
+
+	discard_value_j = 0;
+
+	do {
+		discard_size -= 1;
+
+		discard_value_i = *(discard_value + discard_size);
+
+		discard_value_j = (discard_value_j << 63) | (discard_value_i >> 1);
+
+		*(discard_value + discard_size) = discard_value_j;
+
+		discard_value_j = discard_value_i;
+	} while ( discard_size > 0 );
+}
+
+/**
+ * Test if big integer is equal to zero
+ *
+ * @v value0		Element 0 of big integer
+ * @v size		Number of elements
+ * @ret is_zero		Big integer is equal to zero
+ */
+static inline __attribute__ (( always_inline, pure )) int
+bigint_is_zero_raw ( const uint64_t *value0, unsigned int size ) {
+	const uint64_t *value = value0;
+	uint64_t value_i;
+
+	do {
+		value_i = *(value++);
+		if ( value_i )
+			break;
+	} while ( --size );
+
+	return ( value_i == 0 );
+}
+
+/**
+ * Compare big integers
+ *
+ * @v value0		Element 0 of big integer
+ * @v reference0	Element 0 of reference big integer
+ * @v size		Number of elements
+ * @ret geq		Big integer is greater than or equal to the reference
+ */
+static inline __attribute__ (( always_inline, pure )) int
+bigint_is_geq_raw ( const uint64_t *value0, const uint64_t *reference0,
+		    unsigned int size ) {
+	const uint64_t *value = ( value0 + size );
+	const uint64_t *reference = ( reference0 + size );
+	uint64_t value_i;
+	uint64_t reference_i;
+
+	do {
+		value_i = *(--value);
+		reference_i = *(--reference);
+		if ( value_i != reference_i )
+			break;
+	} while ( --size );
+
+	return ( value_i >= reference_i );
+}
+
+/**
+ * Test if bit is set in big integer
+ *
+ * @v value0		Element 0 of big integer
+ * @v size		Number of elements
+ * @v bit		Bit to test
+ * @ret is_set		Bit is set
+ */
+static inline __attribute__ (( always_inline )) int
+bigint_bit_is_set_raw ( const uint64_t *value0, unsigned int size,
+			unsigned int bit ) {
+	const bigint_t ( size ) __attribute__ (( may_alias )) *value =
+		( ( const void * ) value0 );
+	unsigned int index = ( bit / ( 8 * sizeof ( value->element[0] ) ) );
+	unsigned int subindex = ( bit % ( 8 * sizeof ( value->element[0] ) ) );
+
+	return ( !! ( value->element[index] & ( 1UL << subindex ) ) );
+}
+
+/**
+ * Find highest bit set in big integer
+ *
+ * @v value0		Element 0 of big integer
+ * @v size		Number of elements
+ * @ret max_bit		Highest bit set + 1 (or 0 if no bits set)
+ */
+static inline __attribute__ (( always_inline )) int
+bigint_max_set_bit_raw ( const uint64_t *value0, unsigned int size ) {
+	const uint64_t *value = ( value0 + size );
+	int max_bit = ( 8 * sizeof ( bigint_t ( size ) ) );
+	uint64_t value_i;
+
+	do {
+		value_i = *(--value);
+		max_bit -= ( 64 - fls ( value_i ) );
+		if ( value_i )
+			break;
+	} while ( --size );
+
+	return max_bit;
+}
+
+/**
+ * Grow big integer
+ *
+ * @v source0		Element 0 of source big integer
+ * @v source_size	Number of elements in source big integer
+ * @v dest0		Element 0 of destination big integer
+ * @v dest_size		Number of elements in destination big integer
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_grow_raw ( const uint64_t *source0, unsigned int source_size,
+		  uint64_t *dest0, unsigned int dest_size ) {
+	unsigned int pad_size = ( dest_size - source_size );
+
+	memcpy ( dest0, source0, sizeof ( bigint_t ( source_size ) ) );
+	memset ( ( dest0 + source_size ), 0, sizeof ( bigint_t ( pad_size ) ) );
+}
+
+/**
+ * Shrink big integer
+ *
+ * @v source0		Element 0 of source big integer
+ * @v source_size	Number of elements in source big integer
+ * @v dest0		Element 0 of destination big integer
+ * @v dest_size		Number of elements in destination big integer
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_shrink_raw ( const uint64_t *source0, unsigned int source_size __unused,
+		    uint64_t *dest0, unsigned int dest_size ) {
+
+	memcpy ( dest0, source0, sizeof ( bigint_t ( dest_size ) ) );
+}
+
+/**
+ * Finalise big integer
+ *
+ * @v value0		Element 0 of big integer to finalise
+ * @v size		Number of elements
+ * @v out		Output buffer
+ * @v len		Length of output buffer
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_done_raw ( const uint64_t *value0, unsigned int size __unused,
+		  void *out, size_t len ) {
+	const uint8_t *value_byte = ( ( const void * ) value0 );
+	uint8_t *out_byte = ( out + len );
+
+	/* Copy raw data in reverse order */
+	while ( len-- )
+		*(--out_byte) = *(value_byte++);
+}
+
+extern void bigint_multiply_raw ( const uint64_t *multiplicand0,
+				  const uint64_t *multiplier0,
+				  uint64_t *value0, unsigned int size );
+
+#endif /* _BITS_BIGINT_H */
diff --git a/src/arch/loong64/include/bits/bitops.h b/src/arch/loong64/include/bits/bitops.h
new file mode 100644
index 0000000..6dea29c
--- /dev/null
+++ b/src/arch/loong64/include/bits/bitops.h
@@ -0,0 +1,102 @@
+#ifndef _BITS_BITOPS_H
+#define _BITS_BITOPS_H
+
+/** @file
+ *
+ * loongArch bit operations
+ *
+ * We perform atomic bit set and bit clear operations using "ll"
+ * and "sc".  We use the output constraint to inform the
+ * compiler that any memory from the start of the bit field up to and
+ * including the byte containing the bit may be modified.  (This is
+ * overkill but shouldn't matter in practice since we're unlikely to
+ * subsequently read other bits from the same bit field.)
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#include <stdint.h>
+
+/**
+ * Test and set bit atomically
+ *
+ * @v bit		Bit to set
+ * @v bits		Bit field
+ * @ret old		Old value of bit (zero or non-zero)
+ */
+static inline __attribute__ (( always_inline )) int
+test_and_set_bit ( unsigned int bit, volatile void *bits ) {
+	unsigned int index = ( bit / 64 );
+	unsigned int offset = ( bit % 64 );
+	volatile uint64_t *qword = ( ( ( volatile uint64_t * ) bits ) + index );
+	uint64_t mask = ( 1UL << offset );
+	uint64_t old;
+	uint64_t new;
+
+	__asm__ __volatile__ ( "1:				\n\t"
+			       "ll.d %[old], %[qword]		\n\t"
+			       "or   %[new], %[old], %[mask]	\n\t"
+			       "sc.d %[new], %[qword]		\n\t"
+			       "beqz %[new], 1b			\n\t"
+			       : [old] "=&r" ( old ),
+				 [new] "=&r" ( new ),
+				 [qword] "+m" ( *qword )
+			       : [mask] "r" ( mask )
+			       : "cc", "memory");
+	return ( !! ( old & mask ) );
+}
+
+/**
+ * Test and clear bit atomically
+ *
+ * @v bit		Bit to set
+ * @v bits		Bit field
+ * @ret old		Old value of bit (zero or non-zero)
+ */
+static inline __attribute__ (( always_inline )) int
+test_and_clear_bit ( unsigned int bit, volatile void *bits ) {
+	unsigned int index = ( bit / 64 );
+	unsigned int offset = ( bit % 64 );
+	volatile uint64_t *qword = ( ( ( volatile uint64_t * ) bits ) + index );
+	uint64_t mask = ( 1UL << offset );
+	uint64_t old;
+	uint64_t new;
+
+	__asm__ __volatile__ ( "1:				\n\t"
+			       "ll.d %[old], %[qword]		\n\t"
+			       "andn %[new], %[old], %[mask]	\n\t"
+			       "sc.d %[new], %[qword]		\n\t"
+			       "beqz %[new], 1b			\n\t"
+			       : [old] "=&r" ( old ),
+				 [new] "=&r" ( new ),
+				 [qword] "+m" ( *qword )
+			       : [mask] "r" ( mask )
+			       : "cc", "memory");
+	return ( !! ( old & mask ) );
+}
+
+/**
+ * Set bit atomically
+ *
+ * @v bit		Bit to set
+ * @v bits		Bit field
+ */
+static inline __attribute__ (( always_inline )) void
+set_bit ( unsigned int bit, volatile void *bits ) {
+
+	test_and_set_bit ( bit, bits );
+}
+
+/**
+ * Clear bit atomically
+ *
+ * @v bit		Bit to set
+ * @v bits		Bit field
+ */
+static inline __attribute__ (( always_inline )) void
+clear_bit ( unsigned int bit, volatile void *bits ) {
+
+	test_and_clear_bit ( bit, bits );
+}
+
+#endif /* _BITS_BITOPS_H */
diff --git a/src/arch/loong64/include/bits/byteswap.h b/src/arch/loong64/include/bits/byteswap.h
new file mode 100644
index 0000000..1e22d09
--- /dev/null
+++ b/src/arch/loong64/include/bits/byteswap.h
@@ -0,0 +1,47 @@
+#ifndef _BITS_BYTESWAP_H
+#define _BITS_BYTESWAP_H
+
+/** @file
+ *
+ * Byte-order swapping functions
+ *
+ */
+
+#include <stdint.h>
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+static inline __attribute__ (( always_inline, const )) uint16_t
+__bswap_variable_16 ( uint16_t x ) {
+	__asm__ ( "revb.2h %0, %1" : "=r" ( x ) : "r" ( x ) );
+	return x;
+}
+
+static inline __attribute__ (( always_inline )) void
+__bswap_16s ( uint16_t *x ) {
+	*x = __bswap_variable_16 ( *x );
+}
+
+static inline __attribute__ (( always_inline, const )) uint32_t
+__bswap_variable_32 ( uint32_t x ) {
+	__asm__ ( "revb.2w %0, %1" : "=r" ( x ) : "r" ( x ) );
+	return x;
+}
+
+static inline __attribute__ (( always_inline )) void
+__bswap_32s ( uint32_t *x ) {
+	*x = __bswap_variable_32 ( *x );
+}
+
+static inline __attribute__ (( always_inline, const )) uint64_t
+__bswap_variable_64 ( uint64_t x ) {
+	__asm__ ( "revb.d %0, %1" : "=r" ( x ) : "r" ( x ) );
+	return x;
+}
+
+static inline __attribute__ (( always_inline )) void
+__bswap_64s ( uint64_t *x ) {
+	*x = __bswap_variable_64 ( *x );
+}
+
+#endif
diff --git a/src/arch/loong64/include/bits/compiler.h b/src/arch/loong64/include/bits/compiler.h
new file mode 100644
index 0000000..8bdaf63
--- /dev/null
+++ b/src/arch/loong64/include/bits/compiler.h
@@ -0,0 +1,19 @@
+#ifndef _BITS_COMPILER_H
+#define _BITS_COMPILER_H
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+/** Dummy relocation type */
+#define RELOC_TYPE_NONE R_LARCH_NONE
+
+#ifndef ASSEMBLY
+
+/** Unprefixed constant operand modifier */
+#define ASM_NO_PREFIX "a"
+
+#define __asmcall
+#define __libgcc
+
+#endif /* ASSEMBLY */
+
+#endif /*_BITS_COMPILER_H */
diff --git a/src/arch/loong64/include/bits/endian.h b/src/arch/loong64/include/bits/endian.h
new file mode 100644
index 0000000..85718cf
--- /dev/null
+++ b/src/arch/loong64/include/bits/endian.h
@@ -0,0 +1,8 @@
+#ifndef _BITS_ENDIAN_H
+#define _BITS_ENDIAN_H
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#define __BYTE_ORDER __LITTLE_ENDIAN
+
+#endif /* _BITS_ENDIAN_H */
diff --git a/src/arch/loong64/include/bits/entropy.h b/src/arch/loong64/include/bits/entropy.h
new file mode 100644
index 0000000..8d37269
--- /dev/null
+++ b/src/arch/loong64/include/bits/entropy.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_ENTROPY_H
+#define _BITS_ENTROPY_H
+
+/** @file
+ *
+ * LoongArch64-specific entropy API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_ENTROPY_H */
diff --git a/src/arch/loong64/include/bits/errfile.h b/src/arch/loong64/include/bits/errfile.h
new file mode 100644
index 0000000..5e2c31b
--- /dev/null
+++ b/src/arch/loong64/include/bits/errfile.h
@@ -0,0 +1,19 @@
+#ifndef _BITS_ERRFILE_H
+#define _BITS_ERRFILE_H
+
+/** @file
+ *
+ * LoongArch64-specific error file identifiers
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+/**
+ * @addtogroup errfile Error file identifiers
+ * @{
+ */
+
+/** @} */
+
+#endif /* _BITS_ERRFILE_H */
diff --git a/src/arch/loong64/include/bits/hyperv.h b/src/arch/loong64/include/bits/hyperv.h
new file mode 100644
index 0000000..f0e0c87
--- /dev/null
+++ b/src/arch/loong64/include/bits/hyperv.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_HYPERV_H
+#define _BITS_HYPERV_H
+
+/** @file
+ *
+ * Hyper-V interface
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_HYPERV_H */
diff --git a/src/arch/loong64/include/bits/io.h b/src/arch/loong64/include/bits/io.h
new file mode 100644
index 0000000..20ca6a7
--- /dev/null
+++ b/src/arch/loong64/include/bits/io.h
@@ -0,0 +1,15 @@
+#ifndef _BITS_IO_H
+#define _BITS_IO_H
+
+/** @file
+ *
+ * LoongArch64-specific I/O API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+/** Page shift */
+#define PAGE_SHIFT 12
+
+#endif /* _BITS_IO_H */
diff --git a/src/arch/loong64/include/bits/iomap.h b/src/arch/loong64/include/bits/iomap.h
new file mode 100644
index 0000000..041171d
--- /dev/null
+++ b/src/arch/loong64/include/bits/iomap.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_IOMAP_H
+#define _BITS_IOMAP_H
+
+/** @file
+ *
+ * LoongArch64-specific I/O mapping API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_IOMAP_H */
diff --git a/src/arch/loong64/include/bits/nap.h b/src/arch/loong64/include/bits/nap.h
new file mode 100644
index 0000000..91e255d
--- /dev/null
+++ b/src/arch/loong64/include/bits/nap.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_NAP_H
+#define _BITS_NAP_H
+
+/** @file
+ *
+ * LoongArch64-specific CPU sleeping API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_MAP_H */
diff --git a/src/arch/loong64/include/bits/pci_io.h b/src/arch/loong64/include/bits/pci_io.h
new file mode 100644
index 0000000..fdc5141
--- /dev/null
+++ b/src/arch/loong64/include/bits/pci_io.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_PCI_IO_H
+#define _BITS_PCI_IO_H
+
+/** @file
+ *
+ * LoongArch64-specific PCI I/O API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_PCI_IO_H */
diff --git a/src/arch/loong64/include/bits/profile.h b/src/arch/loong64/include/bits/profile.h
new file mode 100644
index 0000000..9f597ce
--- /dev/null
+++ b/src/arch/loong64/include/bits/profile.h
@@ -0,0 +1,28 @@
+#ifndef _BITS_PROFILE_H
+#define _BITS_PROFILE_H
+
+/** @file
+ *
+ * Profiling
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#include <stdint.h>
+
+/**
+ * Get profiling timestamp
+ *
+ * @ret timestamp	Timestamp
+ */
+static inline __attribute__ (( always_inline )) uint64_t
+profile_timestamp ( void ) {
+	uint64_t cycles;
+
+	/* Read cycle counter */
+	__asm__ __volatile__ ( "rdtime.d %0, $zero\n\t" : "=r" ( cycles ) );
+	return cycles;
+}
+
+#endif /* _BITS_PROFILE_H */
diff --git a/src/arch/loong64/include/bits/reboot.h b/src/arch/loong64/include/bits/reboot.h
new file mode 100644
index 0000000..96a1eb1
--- /dev/null
+++ b/src/arch/loong64/include/bits/reboot.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_REBOOT_H
+#define _BITS_REBOOT_H
+
+/** @file
+ *
+ * LoongArch64-specific reboot API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_REBOOT_H */
diff --git a/src/arch/loong64/include/bits/sanboot.h b/src/arch/loong64/include/bits/sanboot.h
new file mode 100644
index 0000000..f9205e2
--- /dev/null
+++ b/src/arch/loong64/include/bits/sanboot.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_SANBOOT_H
+#define _BITS_SANBOOT_H
+
+/** @file
+ *
+ * LoongArch64-specific sanboot API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_SANBOOT_H */
diff --git a/src/arch/loong64/include/bits/smbios.h b/src/arch/loong64/include/bits/smbios.h
new file mode 100644
index 0000000..6c87db4
--- /dev/null
+++ b/src/arch/loong64/include/bits/smbios.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_SMBIOS_H
+#define _BITS_SMBIOS_H
+
+/** @file
+ *
+ * LoongArch64-specific SMBIOS API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_SMBIOS_H */
diff --git a/src/arch/loong64/include/bits/stdint.h b/src/arch/loong64/include/bits/stdint.h
new file mode 100644
index 0000000..fe1f994
--- /dev/null
+++ b/src/arch/loong64/include/bits/stdint.h
@@ -0,0 +1,23 @@
+#ifndef _BITS_STDINT_H
+#define _BITS_STDINT_H
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+typedef __SIZE_TYPE__		size_t;
+typedef signed long		ssize_t;
+typedef signed long		off_t;
+
+typedef unsigned char		uint8_t;
+typedef unsigned short		uint16_t;
+typedef unsigned int		uint32_t;
+typedef unsigned long long	uint64_t;
+
+typedef signed char		int8_t;
+typedef signed short		int16_t;
+typedef signed int		int32_t;
+typedef signed long long	int64_t;
+
+typedef unsigned long		physaddr_t;
+typedef unsigned long		intptr_t;
+
+#endif /* _BITS_STDINT_H */
diff --git a/src/arch/loong64/include/bits/string.h b/src/arch/loong64/include/bits/string.h
new file mode 100644
index 0000000..f54e258
--- /dev/null
+++ b/src/arch/loong64/include/bits/string.h
@@ -0,0 +1,61 @@
+#ifndef _BITS_STRING_H
+#define _BITS_STRING_H
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+/** @file
+ *
+ * String functions
+ *
+ */
+
+extern void loong64_bzero ( void *dest, size_t len );
+extern void loong64_memset ( void *dest, size_t len, int character );
+extern void loong64_memcpy ( void *dest, const void *src, size_t len );
+extern void loong64_memmove_forwards ( void *dest, const void *src, size_t len );
+extern void loong64_memmove_backwards ( void *dest, const void *src, size_t len );
+extern void loong64_memmove ( void *dest, const void *src, size_t len );
+
+/**
+ * Fill memory region
+ *
+ * @v dest		Destination region
+ * @v character		Fill character
+ * @v len		Length
+ * @ret dest		Destination region
+ */
+static inline __attribute__ (( always_inline )) void *
+memset ( void *dest, int character, size_t len ) {
+	loong64_memset ( dest, len, character );
+	return dest;
+}
+
+/**
+ * Copy memory region
+ *
+ * @v dest		Destination region
+ * @v src		Source region
+ * @v len		Length
+ * @ret dest		Destination region
+ */
+static inline __attribute__ (( always_inline )) void *
+memcpy ( void *dest, const void *src, size_t len ) {
+	loong64_memcpy ( dest, src, len );
+	return dest;
+}
+
+/**
+ * Copy (possibly overlapping) memory region
+ *
+ * @v dest		Destination region
+ * @v src		Source region
+ * @v len		Length
+ * @ret dest		Destination region
+ */
+static inline __attribute__ (( always_inline )) void *
+memmove ( void *dest, const void *src, size_t len ) {
+	loong64_memmove ( dest, src, len );
+	return dest;
+}
+
+#endif /* _BITS_STRING_H */
diff --git a/src/arch/loong64/include/bits/strings.h b/src/arch/loong64/include/bits/strings.h
new file mode 100644
index 0000000..e51977b
--- /dev/null
+++ b/src/arch/loong64/include/bits/strings.h
@@ -0,0 +1,69 @@
+#ifndef _BITS_STRINGS_H
+#define _BITS_STRINGS_H
+
+/** @file
+ *
+ * String functions
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+/**
+ * Find first (i.e. least significant) set bit
+ *
+ * @v value		Value
+ * @ret lsb		Least significant bit set in value (LSB=1), or zero
+ */
+static inline __attribute__ (( always_inline )) int __ffsll ( long long value ){
+	unsigned long long bits = value;
+	unsigned long long lsb;
+	unsigned int lz;
+
+	/* Extract least significant set bit */
+	lsb = ( bits & -bits );
+
+	/* Count number of leading zeroes before LSB */
+	__asm__ ( "clz.d %0, %1" : "=r" ( lz ) : "r" ( lsb ) );
+
+	return ( 64 - lz );
+}
+
+/**
+ * Find first (i.e. least significant) set bit
+ *
+ * @v value		Value
+ * @ret lsb		Least significant bit set in value (LSB=1), or zero
+ */
+static inline __attribute__ (( always_inline )) int __ffsl ( long value ) {
+
+	return __ffsll ( value );
+}
+
+/**
+ * Find last (i.e. most significant) set bit
+ *
+ * @v value		Value
+ * @ret msb		Most significant bit set in value (LSB=1), or zero
+ */
+static inline __attribute__ (( always_inline )) int __flsll ( long long value ){
+	unsigned int lz;
+
+	/* Count number of leading zeroes */
+	__asm__ ( "clz.d %0, %1" : "=r" ( lz ) : "r" ( value ) );
+
+	return ( 64 - lz );
+}
+
+/**
+ * Find last (i.e. most significant) set bit
+ *
+ * @v value		Value
+ * @ret msb		Most significant bit set in value (LSB=1), or zero
+ */
+static inline __attribute__ (( always_inline )) int __flsl ( long value ) {
+
+	return __flsll ( value );
+}
+
+#endif /* _BITS_STRINGS_H */
diff --git a/src/arch/loong64/include/bits/tcpip.h b/src/arch/loong64/include/bits/tcpip.h
new file mode 100644
index 0000000..fc3c5b3
--- /dev/null
+++ b/src/arch/loong64/include/bits/tcpip.h
@@ -0,0 +1,19 @@
+#ifndef _BITS_TCPIP_H
+#define _BITS_TCPIP_H
+
+/** @file
+ *
+ * Transport-network layer interface
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+static inline __attribute__ (( always_inline )) uint16_t
+tcpip_continue_chksum ( uint16_t partial, const void *data, size_t len ) {
+
+	/* Not yet optimised */
+	return generic_tcpip_continue_chksum ( partial, data, len );
+}
+
+#endif /* _BITS_TCPIP_H */
diff --git a/src/arch/loong64/include/bits/time.h b/src/arch/loong64/include/bits/time.h
new file mode 100644
index 0000000..4cd7485
--- /dev/null
+++ b/src/arch/loong64/include/bits/time.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_TIME_H
+#define _BITS_TIME_H
+
+/** @file
+ *
+ * LoongArch64-specific time API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_TIME_H */
diff --git a/src/arch/loong64/include/bits/uaccess.h b/src/arch/loong64/include/bits/uaccess.h
new file mode 100644
index 0000000..dddd9be
--- /dev/null
+++ b/src/arch/loong64/include/bits/uaccess.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_UACCESS_H
+#define _BITS_UACCESS_H
+
+/** @file
+ *
+ * LoongArch64-specific user access API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_UACCESS_H */
diff --git a/src/arch/loong64/include/bits/uart.h b/src/arch/loong64/include/bits/uart.h
new file mode 100644
index 0000000..6f85975
--- /dev/null
+++ b/src/arch/loong64/include/bits/uart.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_UART_H
+#define _BITS_UART_H
+
+/** @file
+ *
+ * 16550-compatible UART
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_UART_H */
diff --git a/src/arch/loong64/include/bits/umalloc.h b/src/arch/loong64/include/bits/umalloc.h
new file mode 100644
index 0000000..f6978b8
--- /dev/null
+++ b/src/arch/loong64/include/bits/umalloc.h
@@ -0,0 +1,12 @@
+#ifndef _BITS_UMALLOC_H
+#define _BITS_UMALLOC_H
+
+/** @file
+ *
+ * LoongArch64-specific user memory allocation API implementations
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#endif /* _BITS_UMALLOC_H */
diff --git a/src/arch/loong64/include/bits/xen.h b/src/arch/loong64/include/bits/xen.h
new file mode 100644
index 0000000..2a3d774
--- /dev/null
+++ b/src/arch/loong64/include/bits/xen.h
@@ -0,0 +1,13 @@
+#ifndef _BITS_XEN_H
+#define _BITS_XEN_H
+
+/** @file
+ *
+ * Xen interface
+ *
+ */
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#include <ipxe/nonxen.h>
+
+#endif /* _BITS_XEN_H */
diff --git a/src/arch/loong64/include/gdbmach.h b/src/arch/loong64/include/gdbmach.h
new file mode 100644
index 0000000..cd152ee
--- /dev/null
+++ b/src/arch/loong64/include/gdbmach.h
@@ -0,0 +1,45 @@
+#ifndef GDBMACH_H
+#define GDBMACH_H
+
+/** @file
+ *
+ * GDB architecture specifics
+ *
+ * This file declares functions for manipulating the machine state and
+ * debugging context.
+ *
+ */
+
+#include <stdint.h>
+
+typedef unsigned long gdbreg_t;
+
+/* Register snapshot */
+enum {
+	/* Not yet implemented */
+	GDBMACH_NREGS,
+};
+
+#define GDBMACH_SIZEOF_REGS ( GDBMACH_NREGS * sizeof ( gdbreg_t ) )
+
+static inline void gdbmach_set_pc ( gdbreg_t *regs, gdbreg_t pc ) {
+	/* Not yet implemented */
+	( void ) regs;
+	( void ) pc;
+}
+
+static inline void gdbmach_set_single_step ( gdbreg_t *regs, int step ) {
+	/* Not yet implemented */
+	( void ) regs;
+	( void ) step;
+}
+
+static inline void gdbmach_breakpoint ( void ) {
+	/* Not yet implemented */
+}
+
+extern int gdbmach_set_breakpoint ( int type, unsigned long addr, size_t len,
+				    int enable );
+extern void gdbmach_init ( void );
+
+#endif /* GDBMACH_H */
diff --git a/src/arch/loong64/include/ipxe/efi/dhcparch.h b/src/arch/loong64/include/ipxe/efi/dhcparch.h
new file mode 100644
index 0000000..5b6dae4
--- /dev/null
+++ b/src/arch/loong64/include/ipxe/efi/dhcparch.h
@@ -0,0 +1,20 @@
+#ifndef _IPXE_EFI_DHCPARCH_H
+#define _IPXE_EFI_DHCPARCH_H
+
+/** @file
+ *
+ * DHCP client architecture definitions
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#include <ipxe/dhcp.h>
+
+/** DHCP client architecture */
+#define DHCP_ARCH_CLIENT_ARCHITECTURE DHCP_CLIENT_ARCHITECTURE_LOONG64
+
+/** DHCP client network device interface */
+#define DHCP_ARCH_CLIENT_NDI 1 /* UNDI */ , 3, 10 /* v3.10 */
+
+#endif /* _IPXE_EFI_DHCPARCH_H */
diff --git a/src/arch/loong64/include/limits.h b/src/arch/loong64/include/limits.h
new file mode 100644
index 0000000..d51f94d
--- /dev/null
+++ b/src/arch/loong64/include/limits.h
@@ -0,0 +1,53 @@
+#ifndef LIMITS_H
+#define LIMITS_H	1
+
+/* Number of bits in a `char' */
+#define CHAR_BIT	8
+
+/* Minimum and maximum values a `signed char' can hold */
+#define SCHAR_MIN	(-128)
+#define SCHAR_MAX	127
+
+/* Maximum value an `unsigned char' can hold. (Minimum is 0.) */
+#define UCHAR_MAX	255
+
+/* Minimum and maximum values a `char' can hold */
+#define CHAR_MIN	SCHAR_MIN
+#define CHAR_MAX	SCHAR_MAX
+
+/* Minimum and maximum values a `signed short int' can hold */
+#define SHRT_MIN	(-32768)
+#define SHRT_MAX	32767
+
+/* Maximum value an `unsigned short' can hold. (Minimum is 0.) */
+#define USHRT_MAX	65535
+
+/* Minimum and maximum values a `signed int' can hold */
+#define INT_MIN		(-INT_MAX - 1)
+#define INT_MAX		2147483647
+
+/* Maximum value an `unsigned int' can hold. (Minimum is 0.) */
+#define UINT_MAX	4294967295U
+
+/* Minimum and maximum values a `signed int' can hold */
+#define INT_MAX		2147483647
+#define INT_MIN		(-INT_MAX - 1)
+
+/* Maximum value an `unsigned int' can hold. (Minimum is 0.) */
+#define UINT_MAX	4294967295U
+
+/* Minimum and maximum values a `signed long' can hold */
+#define LONG_MAX	9223372036854775807L
+#define LONG_MIN	(-LONG_MAX - 1L)
+
+/* Maximum value an `unsigned long' can hold. (Minimum is 0.) */
+#define ULONG_MAX	18446744073709551615UL
+
+/* Minimum and maximum values a `signed long long' can hold */
+#define LLONG_MAX	9223372036854775807LL
+#define LLONG_MIN	(-LONG_MAX - 1LL)
+
+/* Maximum value an `unsigned long long' can hold. (Minimum is 0.) */
+#define ULLONG_MAX	18446744073709551615ULL
+
+#endif /* LIMITS_H */
diff --git a/src/arch/loong64/include/setjmp.h b/src/arch/loong64/include/setjmp.h
new file mode 100644
index 0000000..1e51683
--- /dev/null
+++ b/src/arch/loong64/include/setjmp.h
@@ -0,0 +1,31 @@
+#ifndef _SETJMP_H
+#define _SETJMP_H
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#include <stdint.h>
+
+/** jump buffer env*/
+typedef struct {
+	uint64_t s0;
+	uint64_t s1;
+	uint64_t s2;
+	uint64_t s3;
+	uint64_t s4;
+	uint64_t s5;
+	uint64_t s6;
+	uint64_t s7;
+	uint64_t s8;
+
+	uint64_t fp;
+	uint64_t sp;
+	uint64_t ra;
+} jmp_buf[1];
+
+extern int __asmcall __attribute__ (( returns_twice ))
+setjmp ( jmp_buf env );
+
+extern void __asmcall __attribute__ (( noreturn ))
+longjmp ( jmp_buf env, int val );
+
+#endif /* _SETJMP_H */