diff --git a/arch/x86/cpu/cpu.c b/arch/x86/cpu/cpu.c
index 8102fb9519810b0b2a753f3b7c4c75f1fb39dfe6..8c3b92c78dcbe92331948b9752c6705389eb0f50 100644
--- a/arch/x86/cpu/cpu.c
+++ b/arch/x86/cpu/cpu.c
@@ -90,6 +90,37 @@ static void load_gdt(const u64 *boot_gdt, u16 num_entries)
 	asm volatile("lgdtl %0\n" : : "m" (gdt));
 }
 
+void init_gd(gd_t *id, u64 *gdt_addr)
+{
+	id->gd_addr = (ulong)id;
+	setup_gdt(id, gdt_addr);
+}
+
+void setup_gdt(gd_t *id, u64 *gdt_addr)
+{
+	/* CS: code, read/execute, 4 GB, base 0 */
+	gdt_addr[X86_GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff);
+
+	/* DS: data, read/write, 4 GB, base 0 */
+	gdt_addr[X86_GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff);
+
+	/* FS: data, read/write, 4 GB, base (Global Data Pointer) */
+	gdt_addr[X86_GDT_ENTRY_32BIT_FS] = GDT_ENTRY(0xc093, (ulong)id, 0xfffff);
+
+	/* 16-bit CS: code, read/execute, 64 kB, base 0 */
+	gdt_addr[X86_GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x109b, 0, 0x0ffff);
+
+	/* 16-bit DS: data, read/write, 64 kB, base 0 */
+	gdt_addr[X86_GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x1093, 0, 0x0ffff);
+
+	load_gdt(gdt_addr, X86_GDT_NUM_ENTRIES);
+	load_ds(X86_GDT_ENTRY_32BIT_DS);
+	load_es(X86_GDT_ENTRY_32BIT_DS);
+	load_gs(X86_GDT_ENTRY_32BIT_DS);
+	load_ss(X86_GDT_ENTRY_32BIT_DS);
+	load_fs(X86_GDT_ENTRY_32BIT_FS);
+}
+
 int x86_cpu_init_f(void)
 {
 	const u32 em_rst = ~X86_CR0_EM;
@@ -117,28 +148,6 @@ int x86_cpu_init_r(void)
 	    "movl	%%eax, %%cr0\n"
 	    "wbinvd\n" : : "i" (nw_cd_rst) : "eax");
 
-	/*
-	 * There are machines which are known to not boot with the GDT
-	 * being 8-byte unaligned. Intel recommends 16 byte alignment
-	 */
-	static const u64 boot_gdt[] __aligned(16) = {
-		/* CS: code, read/execute, 4 GB, base 0 */
-		[X86_GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
-		/* DS: data, read/write, 4 GB, base 0 */
-		[X86_GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
-		/* 16-bit CS: code, read/execute, 64 kB, base 0 */
-		[X86_GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x109b, 0, 0x0ffff),
-		/* 16-bit DS: data, read/write, 64 kB, base 0 */
-		[X86_GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x1093, 0, 0x0ffff),
-	};
-
-	load_gdt(boot_gdt, X86_GDT_NUM_ENTRIES);
-	load_ds(X86_GDT_ENTRY_32BIT_DS);
-	load_es(X86_GDT_ENTRY_32BIT_DS);
-	load_fs(X86_GDT_ENTRY_32BIT_DS);
-	load_gs(X86_GDT_ENTRY_32BIT_DS);
-	load_ss(X86_GDT_ENTRY_32BIT_DS);
-
 	/* Initialize core interrupt and exception functionality of CPU */
 	cpu_init_interrupts();
 	return 0;
diff --git a/arch/x86/cpu/start.S b/arch/x86/cpu/start.S
index 69a9b2cddcf7def542992bda5f6426fc396c8da8..ee0dabe4bcd049becb3c632ec408c98f32d13fb3 100644
--- a/arch/x86/cpu/start.S
+++ b/arch/x86/cpu/start.S
@@ -31,7 +31,7 @@
 #include <asm/global_data.h>
 #include <asm/processor.h>
 #include <asm/processor-flags.h>
-#include <generated/asm-offsets.h>
+#include <generated/generic-asm-offsets.h>
 
 .section .text
 .code32
@@ -85,6 +85,12 @@ car_init_ret:
 	 */
 	movl	$CONFIG_SYS_INIT_SP_ADDR, %esp
 
+	/* Initialise the Global Data Pointer */
+	movl	$CONFIG_SYS_INIT_GD_ADDR, %eax
+	movl	%eax, %edx
+	addl	$GENERATED_GBL_DATA_SIZE, %edx
+	call	init_gd;
+
 	/* Set parameter to board_init_f() to boot flags */
 	xorl	%eax, %eax
 	movw	%bx, %ax
diff --git a/arch/x86/include/asm/global_data.h b/arch/x86/include/asm/global_data.h
index 05a2139d0017fc1096af97e3e9d4747aad0d8887..908a02c900c9a6084cde56c60c787897dd7c5192 100644
--- a/arch/x86/include/asm/global_data.h
+++ b/arch/x86/include/asm/global_data.h
@@ -36,6 +36,8 @@
 #ifndef __ASSEMBLY__
 
 typedef	struct global_data {
+	/* NOTE: gd_addr MUST be first member of struct global_data! */
+	unsigned long	gd_addr;	/* Location of Global Data */
 	bd_t		*bd;
 	unsigned long	flags;
 	unsigned long	baudrate;
@@ -51,13 +53,24 @@ typedef	struct global_data {
 	unsigned long	bus_clk;
 	unsigned long	relocaddr;	/* Start address of U-Boot in RAM */
 	unsigned long	start_addr_sp;	/* start_addr_stackpointer */
+	unsigned long	gdt_addr;	/* Location of GDT */
+	unsigned long	new_gd_addr;	/* New location of Global Data */
 	phys_size_t	ram_size;	/* RAM size */
 	unsigned long	reset_status;	/* reset status register at boot */
 	void		**jt;		/* jump table */
 	char		env_buf[32];	/* buffer for getenv() before reloc. */
 } gd_t;
 
-extern gd_t *gd;
+static inline gd_t *get_fs_gd_ptr(void)
+{
+	gd_t *gd_ptr;
+
+	asm volatile("fs movl 0, %0\n" : "=r" (gd_ptr));
+
+	return gd_ptr;
+}
+
+#define gd	get_fs_gd_ptr()
 
 #endif
 
@@ -73,12 +86,6 @@ extern gd_t *gd;
 #define GD_FLG_DISABLE_CONSOLE	0x00040	/* Disable console (in & out)		*/
 #define GD_FLG_ENV_READY	0x00080	/* Environment imported into hash table	*/
 
-#if 0
 #define DECLARE_GLOBAL_DATA_PTR
-#else
-#define XTRN_DECLARE_GLOBAL_DATA_PTR    extern
-#define DECLARE_GLOBAL_DATA_PTR     XTRN_DECLARE_GLOBAL_DATA_PTR \
-gd_t *gd
-#endif
 
 #endif /* __ASM_GBL_DATA_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index aa8188e51a2f064827fac021ba62915276a34391..6eb518063bd8ec031706f778c83710004952cfb8 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -33,6 +33,7 @@ enum {
 	X86_GDT_ENTRY_UNUSED,
 	X86_GDT_ENTRY_32BIT_CS,
 	X86_GDT_ENTRY_32BIT_DS,
+	X86_GDT_ENTRY_32BIT_FS,
 	X86_GDT_ENTRY_16BIT_CS,
 	X86_GDT_ENTRY_16BIT_DS,
 	X86_GDT_NUM_ENTRIES
diff --git a/arch/x86/include/asm/u-boot-x86.h b/arch/x86/include/asm/u-boot-x86.h
index c3d2277834e4b7b3c9cea1131e4f90f793a3d1bd..5540d517692539624a545c659b7f2385d2599f23 100644
--- a/arch/x86/include/asm/u-boot-x86.h
+++ b/arch/x86/include/asm/u-boot-x86.h
@@ -37,6 +37,8 @@ int x86_cpu_init_r(void);
 int cpu_init_r(void);
 int x86_cpu_init_f(void);
 int cpu_init_f(void);
+void init_gd(gd_t *id, u64 *gdt_addr);
+void setup_gdt(gd_t *id, u64 *gdt_addr);
 
 /* cpu/.../timer.c */
 void timer_isr(void *);
diff --git a/arch/x86/lib/board.c b/arch/x86/lib/board.c
index 6f075b7aa003aae8863779cffbf293bf457261f0..b64c2d3ca385ba0db8c27ff72b517578bb2b8ddf 100644
--- a/arch/x86/lib/board.c
+++ b/arch/x86/lib/board.c
@@ -42,20 +42,12 @@
 #include <serial.h>
 #include <asm/u-boot-x86.h>
 #include <elf.h>
+#include <asm/processor.h>
 
 #ifdef CONFIG_BITBANGMII
 #include <miiphy.h>
 #endif
 
-/*
- * Pointer to initial global data area
- *
- * Here we initialize it.
- */
-#undef	XTRN_DECLARE_GLOBAL_DATA_PTR
-#define XTRN_DECLARE_GLOBAL_DATA_PTR	/* empty = allocate here */
-DECLARE_GLOBAL_DATA_PTR = (gd_t *) (CONFIG_SYS_INIT_GD_ADDR);
-
 /************************************************************************
  * Init Utilities							*
  ************************************************************************
@@ -128,6 +120,7 @@ static int calculate_relocation_address(void);
 static int copy_uboot_to_ram(void);
 static int clear_bss(void);
 static int do_elf_reloc_fixups(void);
+static int copy_gd_to_ram(void);
 
 init_fnc_t *init_sequence_f[] = {
 	cpu_init_f,
@@ -146,6 +139,7 @@ init_fnc_t *init_sequence_f[] = {
 };
 
 init_fnc_t *init_sequence_r[] = {
+	copy_gd_to_ram,
 	cpu_init_r,		/* basic cpu dependent setup */
 	board_early_init_r,	/* basic board dependent setup */
 	dram_init,		/* configure available RAM banks */
@@ -157,8 +151,6 @@ init_fnc_t *init_sequence_r[] = {
 	NULL,
 };
 
-gd_t *gd;
-
 static int calculate_relocation_address(void)
 {
 	ulong text_start = (ulong)&__text_start;
@@ -171,8 +163,18 @@ static int calculate_relocation_address(void)
 	 *       requirements
 	 */
 
-	/* Stack is at top of available memory */
+	/* Global Data is at top of available memory */
 	dest_addr = gd->ram_size;
+	dest_addr -= GENERATED_GBL_DATA_SIZE;
+	dest_addr &= ~15;
+	gd->new_gd_addr = dest_addr;
+
+	/* GDT is below Global Data */
+	dest_addr -= X86_GDT_SIZE;
+	dest_addr &= ~15;
+	gd->gdt_addr = dest_addr;
+
+	/* Stack is below GDT */
 	gd->start_addr_sp = dest_addr;
 
 	/* U-Boot is below the stack */
@@ -279,6 +281,31 @@ void board_init_f_r(void)
 		;
 }
 
+static int copy_gd_to_ram(void)
+{
+	gd_t *ram_gd;
+
+	/*
+	 * Global data is still in temporary memory (the CPU cache).
+	 * calculate_relocation_address() has set gd->new_gd_addr to
+	 * where the global data lives in RAM but getting it there
+	 * safely is a bit tricky due to the 'F-Segment Hack' that
+	 * we need to use for x86
+	 */
+	ram_gd = (gd_t *)gd->new_gd_addr;
+	memcpy((void *)ram_gd, gd, sizeof(gd_t));
+
+	/*
+	 * Reload the Global Descriptor Table so FS points to the
+	 * in-RAM copy of Global Data (calculate_relocation_address()
+	 * has already calculated the in-RAM location of the GDT)
+	 */
+	ram_gd->gd_addr = (ulong)ram_gd;
+	init_gd(ram_gd, (u64 *)gd->gdt_addr);
+
+	return 0;
+}
+
 void board_init_r(gd_t *id, ulong dest_addr)
 {
 #if defined(CONFIG_CMD_NET)
@@ -288,15 +315,10 @@ void board_init_r(gd_t *id, ulong dest_addr)
 	ulong size;
 #endif
 	static bd_t bd_data;
-	static gd_t gd_data;
 	init_fnc_t **init_fnc_ptr;
 
 	show_boot_progress(0x21);
 
-	/* Global data pointer is now writable */
-	gd = &gd_data;
-	memcpy(gd, id, sizeof(gd_t));
-
 	/* compiler optimization barrier needed for GCC >= 3.4 */
 	__asm__ __volatile__("" : : : "memory");