Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: [OMPI devel] [Patch] Add support for ARMv7-A architecture
From: Leif Lindholm (leif.lindholm_at_[hidden])
Date: 2010-12-24 09:22:56


Hi,

The following patch adds support for the ARMv7-A architecture to opal.
This includes current processors such as Cortex-A8 and Cortex-A9, as
well as upcoming Cortex-A5 and Cortex-A15.

It has been validated on Ubuntu Lucid (10.04) and Maverick (10.10),
although the former might require some package updates to build from
checkout.

The opal/include/opal/sys/arm directory was cloned from powerpc.

I apologise for what I had to do to generate-asm.pl to get it to build.

Signed-off-by: leif.lindholm_at_[hidden]

Index: ompi-trunk/opal/asm/generate-asm.pl
===================================================================
--- ompi-trunk/opal/asm/generate-asm.pl (revision 24191)
+++ ompi-trunk/opal/asm/generate-asm.pl (working copy)
@@ -103,7 +103,11 @@
 }
 
 if ($GNU_STACK == 1) {
- print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\@progbits\n";
+ if ($asmarch eq "ARM") {
+ print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\%progbits\n";
+ } else {
+ print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\@progbits\n";
+ }
 }
 
 close(INPUT);
Index: ompi-trunk/opal/asm/asm-data.txt
===================================================================
--- ompi-trunk/opal/asm/asm-data.txt (revision 24191)
+++ ompi-trunk/opal/asm/asm-data.txt (working copy)
@@ -48,6 +48,15 @@
 
 ######################################################################
 #
+# ARM (ARMv7 and later)
+#
+######################################################################
+
+ARM default-.text-.globl-:--.L-#-1-1-1-1-1 arm-linux
+
+
+######################################################################
+#
 # Intel Pentium Class
 #
 ######################################################################
Index: ompi-trunk/opal/asm/base/ARM.asm
===================================================================
--- ompi-trunk/opal/asm/base/ARM.asm (revision 0)
+++ ompi-trunk/opal/asm/base/ARM.asm (revision 0)
@@ -0,0 +1,150 @@
+START_FILE
+ TEXT
+
+ ALIGN(4)
+START_FUNC(opal_atomic_mb)
+ dmb
+ bx lr
+END_FUNC(opal_atomic_mb)
+
+
+START_FUNC(opal_atomic_rmb)
+ dmb
+ bx lr
+END_FUNC(opal_atomic_rmb)
+
+
+START_FUNC(opal_atomic_wmb)
+ dmb
+ bx lr
+END_FUNC(opal_atomic_wmb)
+
+
+START_FUNC(opal_atomic_cmpset_32)
+ LSYM(1)
+ ldrex r3, [r0]
+ cmp r1, r3
+ bne REFLSYM(2)
+ strex r12, r2, [r0]
+ cmp r12, #0
+ bne REFLSYM(1)
+ mov r0, #1
+ LSYM(2)
+ movne r0, #0
+ bx lr
+END_FUNC(opal_atomic_cmpset_32)
+
+
+START_FUNC(opal_atomic_cmpset_acq_32)
+ LSYM(3)
+ ldrex r3, [r0]
+ cmp r1, r3
+ bne REFLSYM(4)
+ strex r12, r2, [r0]
+ cmp r12, #0
+ bne REFLSYM(3)
+ dmb
+ mov r0, #1
+ LSYM(4)
+ movne r0, #0
+ bx lr
+END_FUNC(opal_atomic_cmpset_acq_32)
+
+
+START_FUNC(opal_atomic_cmpset_rel_32)
+ LSYM(5)
+ ldrex r3, [r0]
+ cmp r1, r3
+ bne REFLSYM(6)
+ dmb
+ strex r12, r2, [r0]
+ cmp r12, #0
+ bne REFLSYM(4)
+ mov r0, #1
+ LSYM(6)
+ movne r0, #0
+ bx lr
+END_FUNC(opal_atomic_cmpset_rel_32)
+
+#START_64BIT
+START_FUNC(opal_atomic_cmpset_64)
+ push {r4-r7}
+ ldrd r6, r7, [sp, #16]
+ LSYM(7)
+ ldrexd r4, r5, [r0]
+ cmp r4, r2
+ cmpeq r5, r3
+ bne REFLSYM(8)
+ strexd r1, r6, r7, [r0]
+ cmp r1, #0
+ bne REFLSYM(7)
+ mov r0, #1
+ LSYM(8)
+ movne r0, #0
+ pop {r4-r7}
+ bx lr
+END_FUNC(opal_atomic_cmpset_64)
+
+START_FUNC(opal_atomic_cmpset_acq_64)
+ push {r4-r7}
+ ldrd r6, r7, [sp, #16]
+ LSYM(9)
+ ldrexd r4, r5, [r0]
+ cmp r4, r2
+ cmpeq r5, r3
+ bne REFLSYM(10)
+ strexd r1, r6, r7, [r0]
+ cmp r1, #0
+ bne REFLSYM(9)
+ dmb
+ mov r0, #1
+ LSYM(10)
+ movne r0, #0
+ pop {r4-r7}
+ bx lr
+END_FUNC(opal_atomic_cmpset_acq_64)
+
+
+START_FUNC(opal_atomic_cmpset_rel_64)
+ push {r4-r7}
+ ldrd r6, r7, [sp, #16]
+ LSYM(11)
+ ldrexd r4, r5, [r0]
+ cmp r4, r2
+ cmpeq r5, r3
+ bne REFLSYM(12)
+ dmb
+ strexd r1, r6, r7, [r0]
+ cmp r1, #0
+ bne REFLSYM(11)
+ mov r0, #1
+ LSYM(12)
+ movne r0, #0
+ pop {r4-r7}
+ bx lr
+END_FUNC(opal_atomic_cmpset_rel_64)
+#END_64BIT
+
+
+START_FUNC(opal_atomic_add_32)
+ LSYM(13)
+ ldrex r2, [r0]
+ add r2, r2, r1
+ strex r3, r2, [r0]
+ cmp r3, #0
+ bne REFLSYM(13)
+ mov r0, r2
+ bx lr
+END_FUNC(opal_atomic_add_32)
+
+
+START_FUNC(opal_atomic_sub_32)
+ LSYM(14)
+ ldrex r2, [r0]
+ sub r2, r2, r1
+ strex r3, r2, [r0]
+ cmp r3, #0
+ bne REFLSYM(14)
+ mov r0, r2
+ bx lr
+END_FUNC(opal_atomic_sub_32)
Index: ompi-trunk/opal/include/opal/sys/arm/atomic.h
===================================================================
--- ompi-trunk/opal/include/opal/sys/arm/atomic.h (revision 0)
+++ ompi-trunk/opal/include/opal/sys/arm/atomic.h (revision 0)
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ * University Research and Technology
+ * Corporation. All rights reserved.
+ * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * of Tennessee Research Foundation. All rights
+ * reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ * University of Stuttgart. All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 2010 IBM Corporation. All rights reserved.
+ * Copyright (c) 2010 ARM ltd. All rights reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#ifndef OMPI_SYS_ARCH_ATOMIC_H
+#define OMPI_SYS_ARCH_ATOMIC_H 1
+
+#if OPAL_WANT_SMP_LOCKS
+
+#define MB() __asm__ __volatile__ ("dmb" : : : "memory")
+#define RMB() __asm__ __volatile__ ("dmb" : : : "memory")
+#define WMB() __asm__ __volatile__ ("dmb" : : : "memory")
+
+#else
+
+#define MB()
+#define RMB()
+#define WMB()
+
+#endif
+
+
+/**********************************************************************
+ *
+ * Define constants for ARMv7
+ *
+ *********************************************************************/
+#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
+
+#define OPAL_HAVE_ATOMIC_CMPSET_32 1
+
+#define OPAL_HAVE_ATOMIC_CMPSET_64 1
+
+#define OPAL_HAVE_ATOMIC_MATH_32 1
+#define OPAL_HAVE_ATOMIC_ADD_32 1
+#define OPAL_HAVE_ATOMIC_SUB_32 1
+
+
+/**********************************************************************
+ *
+ * Memory Barriers
+ *
+ *********************************************************************/
+#if OMPI_GCC_INLINE_ASSEMBLY
+
+static inline
+void opal_atomic_mb(void)
+{
+ MB();
+}
+
+
+static inline
+void opal_atomic_rmb(void)
+{
+ RMB();
+}
+
+
+static inline
+void opal_atomic_wmb(void)
+{
+ WMB();
+}
+
+
+/**********************************************************************
+ *
+ * Atomic math operations
+ *
+ *********************************************************************/
+
+static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
+ int32_t oldval, int32_t newval)
+{
+ int32_t ret, tmp;
+
+ __asm__ __volatile__ (
+ "1: ldrex %0, [%2] \n"
+ " cmp %0, %3 \n"
+ " bne 2f \n"
+ " strex %1, %4, [%2] \n"
+ " cmp %1, #0 \n"
+ " bne 1b \n"
+ "2: \n"
+
+ : "=&r" (ret), "=&r" (tmp)
+ : "r" (addr), "r" (oldval), "r" (newval)
+ : "cc", "memory");
+
+ return (ret == oldval);
+}
+
+/* these two functions aren't inlined in the non-gcc case because then
+ there would be two function calls (since neither cmpset_32 nor
+ atomic_?mb can be inlined). Instead, we "inline" them by hand in
+ the assembly, meaning there is one function call overhead instead
+ of two */
+static inline int opal_atomic_cmpset_acq_32(volatile int32_t *addr,
+ int32_t oldval, int32_t newval)
+{
+ int rc;
+
+ rc = opal_atomic_cmpset_32(addr, oldval, newval);
+ opal_atomic_rmb();
+
+ return rc;
+}
+
+
+static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
+ int32_t oldval, int32_t newval)
+{
+ opal_atomic_wmb();
+ return opal_atomic_cmpset_32(addr, oldval, newval);
+}
+
+
+static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
+ int64_t oldval, int64_t newval)
+{
+ int64_t ret;
+ int tmp;
+
+
+ __asm__ __volatile__ (
+ "1: ldrexd %0, %H0, [%2] \n"
+ " cmp %0, %3 \n"
+ " cmpeq %H0, %H3 \n"
+ " bne 2f \n"
+ " strexd %1, %4, %H4, [%2] \n"
+ " cmp %1, #0 \n"
+ " bne 1b \n"
+ "2: \n"
+
+ : "=&r" (ret), "=&r" (tmp)
+ : "r" (addr), "r" (oldval), "r" (newval)
+ : "cc", "memory");
+
+ return (ret == oldval);
+}
+
+/* these two functions aren't inlined in the non-gcc case because then
+ there would be two function calls (since neither cmpset_64 nor
+ atomic_?mb can be inlined). Instead, we "inline" them by hand in
+ the assembly, meaning there is one function call overhead instead
+ of two */
+static inline int opal_atomic_cmpset_acq_64(volatile int64_t *addr,
+ int64_t oldval, int64_t newval)
+{
+ int rc;
+
+ rc = opal_atomic_cmpset_64(addr, oldval, newval);
+ opal_atomic_rmb();
+
+ return rc;
+}
+
+
+static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
+ int64_t oldval, int64_t newval)
+{
+ opal_atomic_wmb();
+ return opal_atomic_cmpset_64(addr, oldval, newval);
+}
+
+
+static inline int32_t opal_atomic_add_32(volatile int32_t* v, int inc)
+{
+ int32_t t;
+ int tmp;
+
+ __asm__ __volatile__(
+ "1: ldrex %0, [%2] \n"
+ " add %0, %0, %3 \n"
+ " strex %1, %0, [%2] \n"
+ " cmp %1, #0 \n"
+ " bne 1b \n"
+
+ : "=&r" (t), "=&r" (tmp)
+ : "r" (v), "r" (inc)
+ : "cc", "memory");
+
+
+ return t;
+}
+
+
+static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int dec)
+{
+ int32_t t;
+ int tmp;
+
+ __asm__ __volatile__(
+ "1: ldrex %0, [%2] \n"
+ " sub %0, %0, %3 \n"
+ " strex %1, %0, [%2] \n"
+ " cmp %1, #0 \n"
+ " bne 1b \n"
+
+ : "=&r" (t), "=&r" (tmp)
+ : "r" (v), "r" (dec)
+ : "cc", "memory");
+
+ return t;
+}
+
+
+#endif /* OMPI_GCC_INLINE_ASSEMBLY */
+
+#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */
Index: ompi-trunk/opal/include/opal/sys/arm/timer.h
===================================================================
--- ompi-trunk/opal/include/opal/sys/arm/timer.h (revision 0)
+++ ompi-trunk/opal/include/opal/sys/arm/timer.h (revision 0)
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2008 The University of Tennessee and The University
+ * of Tennessee Research Foundation. All rights
+ * reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#ifndef OMPI_SYS_ARCH_TIMER_H
+#define OMPI_SYS_ARCH_TIMER_H 1
+
+#include <sys/times.h>
+
+typedef uint64_t opal_timer_t;
+
+static inline opal_timer_t
+opal_sys_timer_get_cycles(void)
+{
+ opal_timer_t ret;
+ struct tms accurate_clock;
+
+ times(&accurate_clock);
+ ret = accurate_clock.tms_utime + accurate_clock.tms_stime;
+
+ return ret;
+}
+
+#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1
+
+#endif /* ! OMPI_SYS_ARCH_TIMER_H */
Index: ompi-trunk/opal/include/opal/sys/arm/Makefile.am
===================================================================
--- ompi-trunk/opal/include/opal/sys/arm/Makefile.am (revision 0)
+++ ompi-trunk/opal/include/opal/sys/arm/Makefile.am (revision 0)
@@ -0,0 +1,24 @@
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+# University Research and Technology
+# Corporation. All rights reserved.
+# Copyright (c) 2004-2008 The University of Tennessee and The University
+# of Tennessee Research Foundation. All rights
+# reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+# University of Stuttgart. All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+# All rights reserved.
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+# This makefile.am does not stand on its own - it is included from
opal/include/Makefile.am
+
+headers += \
+ opal/sys/arm/atomic.h \
+ opal/sys/arm/timer.h
+
Index: ompi-trunk/opal/include/opal/sys/arm/update.sh
===================================================================
--- ompi-trunk/opal/include/opal/sys/arm/update.sh (revision 0)
+++ ompi-trunk/opal/include/opal/sys/arm/update.sh (revision 0)
@@ -0,0 +1,37 @@
+#!/bin/sh
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+# University Research and Technology
+# Corporation. All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+# of Tennessee Research Foundation. All rights
+# reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+# University of Stuttgart. All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+# All rights reserved.
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+CFILE=/tmp/opal_atomic_$$.c
+
+trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15
+
+echo Updating atomic.s from atomic.h using gcc
+
+cat > $CFILE<<EOF
+#include <stdlib.h>
+#include <inttypes.h>
+#define static
+#define inline
+#define OMPI_GCC_INLINE_ASSEMBLY 1
+#define OPAL_WANT_SMP_LOCKS 1
+#include "../architecture.h"
+#include "atomic.h"
+EOF
+
+gcc -O1 -I. -S $CFILE -o atomic.s
Index: ompi-trunk/opal/include/opal/sys/atomic.h
===================================================================
--- ompi-trunk/opal/include/opal/sys/atomic.h (revision 24191)
+++ ompi-trunk/opal/include/opal/sys/atomic.h (working copy)
@@ -146,6 +146,8 @@
 #include "opal/sys/alpha/atomic.h"
 #elif OPAL_ASSEMBLY_ARCH == OMPI_AMD64
 #include "opal/sys/amd64/atomic.h"
+#elif OPAL_ASSEMBLY_ARCH == OMPI_ARM
+#include "opal/sys/arm/atomic.h"
 #elif OPAL_ASSEMBLY_ARCH == OMPI_IA32
 #include "opal/sys/ia32/atomic.h"
 #elif OPAL_ASSEMBLY_ARCH == OMPI_IA64
Index: ompi-trunk/opal/include/opal/sys/timer.h
===================================================================
--- ompi-trunk/opal/include/opal/sys/timer.h (revision 24191)
+++ ompi-trunk/opal/include/opal/sys/timer.h (working copy)
@@ -79,6 +79,8 @@
 /* don't include system-level gorp when generating doxygen files */
 #elif OPAL_ASSEMBLY_ARCH == OMPI_AMD64
 #include "opal/sys/amd64/timer.h"
+#elif OPAL_ASSEMBLY_ARCH == OMPI_ARM
+#include "opal/sys/arm/timer.h"
 #elif OPAL_ASSEMBLY_ARCH == OMPI_IA32
 #include "opal/sys/ia32/timer.h"
 #elif OPAL_ASSEMBLY_ARCH == OMPI_IA64
Index: ompi-trunk/opal/include/opal/sys/architecture.h
===================================================================
--- ompi-trunk/opal/include/opal/sys/architecture.h (revision 24191)
+++ ompi-trunk/opal/include/opal/sys/architecture.h (working copy)
@@ -36,6 +36,7 @@
 #define OMPI_SPARCV9_32 0061
 #define OMPI_SPARCV9_64 0062
 #define OMPI_MIPS 0070
+#define OMPI_ARM 0100
 
 /* Formats */
 #define OMPI_DEFAULT 1000 /* standard for given architecture */
Index: ompi-trunk/opal/config/opal_config_asm.m4
===================================================================
--- ompi-trunk/opal/config/opal_config_asm.m4 (revision 24191)
+++ ompi-trunk/opal/config/opal_config_asm.m4 (working copy)
@@ -900,6 +900,12 @@
             OMPI_GCC_INLINE_ASSIGN='"bis [$]31,[$]31,%0" : "=&r"(ret)'
             ;;
 
+ armv7*)
+ ompi_cv_asm_arch="ARM"
+ OPAL_ASM_SUPPORT_64BIT=1
+ OMPI_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)'
+ ;;
+
         mips-*|mips64*)
             # Should really find some way to make sure that we are on
             # a MIPS III machine (r4000 and later)