Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] amd64 atomic.h warnings
From: Jeff Squyres (jsquyres_at_[hidden])
Date: 2010-06-08 10:42:30


Look at my output -- I did...

On Jun 8, 2010, at 10:40 AM, George Bosilca wrote:

> Still no good, the opal_atomic_cmpset_32 is not inlined. Try to add -O3 to your command line, this helped for gcc.
>
> Thanks,
> george.
>
> On Jun 8, 2010, at 14:14 , Jeff Squyres wrote:
>
> > On Jun 8, 2010, at 9:53 AM, George Bosilca wrote:
> >
> >> As you can see there is no explicit call, the opal_atomic_cmpset_32 is really inlined. I think the problem is that you didn't specify the -O3 flag on your command line.
> >
> > Ah, you wanted me to compile the OMPI code itself and send you the assembly. That's not what you asked for. :-)
> >
> > (I just took the code you sent in the mail, stuffed it into george.c, and compiled that with -s -- outside of the context of the Open MPI code tree)
> >
> > Here's the new output. It still didn't inline, but you can see the code for the _cmpset function:
> >
> > -----
> > [7:13] svbu-mpi:~/tmp % cat george.c
> > #include <stdint.h>
> >
> > #include "opal/sys/atomic.h"
> >
> > int foo(void) {
> > int32_t oldval, delta;
> > int32_t *addr = 0;
> > do {
> > oldval = *addr;
> > } while (0 == opal_atomic_cmpset_32(addr, oldval, oldval + delta));
> > return (oldval + delta);
> > }
> >
> > [7:13] svbu-mpi:~/tmp % pgcc -O3 -I /home/jsquyres/svn/ompi4 -I/home/jsquyres/svn/ompi4/opal/include -c -s george.c
> > [7:13] svbu-mpi:~/tmp % cat george.s .file "george.c"
> > .version "01.01"
> > ## PGC 7.0 -opt 1
> > ## PGC 06/08/2010 05:10:04
> > ## pgcc george.c -c -S
> > ## /opt/pgi/7.0.7/linux86-64/7.0-7/bin/pgc
> > ## george.c -opt 1 -terse 1 -inform warn -x 119 0xa10000 -x 122 0x40 -x 123 0x1000
> > ## -x 127 4 -x 127 16 -x 19 0x400000 -x 28 0x40000 -x 70 0x8000 -x 122 1 -quad
> > ## -x 59 4 -x 59 4 -tp p7-64 -astype 0 -stdinc /opt/pgi/7.0.7/linux86-64/7.0-7/include:/usr/local/include:/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include:/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include:/usr/include
> > ## -def unix -def __unix -def __unix__ -def linux -def __linux -def __linux__
> > ## -def __NO_MATH_INLINES -def __x86_64__ -def __LONG_MAX__=9223372036854775807L
> > ## -def __SIZE_TYPE__=unsigned long int -def __PTRDIFF_TYPE__=long int -def __THROW=
> > ## -def __extension__= -def __amd64__ -def __SSE__ -def __MMX__ -def __SSE2__
> > ## -def __SSE3__ -predicate #machine(x86_64) #lint(off) #system(posix) #cpu(x86_64)
> > ## -cmdline +pgcc george.c -c -S -x 123 4 -x 123 0x80000000 -alwaysinline /opt/pgi/7.0.7/linux86-64/7.0-7/lib/libintrinsics.il 4
> > ## -asm george.s
> > ## lineno: 3
> > .text
> > .align 16
> > .globl foo
> > foo:
> > ..Dcfb0:
> > pushq %rbp
> > ..Dcfi0:
> > movq %rsp, %rbp
> > ..Dcfi1:
> > subq $16, %rsp
> > ..EN1:
> > ## lineno: 5
> > movq $0, -8(%rbp)
> > .p2align 4,,3
> > .LB157:
> > ## lineno: 6
> > movq -8(%rbp), %rdi
> > movl (%rdi), %esi
> > movl %esi, -12(%rbp)
> > movl -16(%rbp), %edx
> > addl %esi, %edx
> > xorl %eax, %eax
> > call opal_atomic_cmpset_32
> > testl %eax, %eax
> > je .LB157
> > movl -16(%rbp), %eax
> > addl -12(%rbp), %eax
> > ## lineno: 10
> > leave
> > ret
> > .type foo,@function
> > .size foo,.-foo
> > ..Dcfe0:
> > __fooEND:
> > .section .pgi_trace
> > .align 8
> > .quad foo ## address of routine
> > .quad __fooEND - foo ## size of routine
> > .2byte 0 ## flags for future use
> > .2byte 3 ## length of following string
> > ## name:foo:
> > .byte 0x66,0x6f,0x6f,0x00
> > .data
> > .globl opal_atomic_cmpset_32
> > .section .debug_frame
> > ..Dcieb0:
> > .4byte ..Dciee0-..Dcieb0-4 ## CIE length
> > .4byte 0xffffffff ## CIE ID
> > .byte 0x1 ## CIE version
> > .byte 0x0 ## no augmentation
> > .byte 0x1 ## ULEB128 1, code alignment factor
> > .byte 0x78 ## SLEB128 -8, data alignment factor
> > .byte 0x10 ## return address column
> > .byte 0xc ## DW_CFA_def_cfa (col 7)
> > .byte 0x7 ## ULEB128 7
> > .byte 0x8 ## ULEB128 8
> > .byte 0x90 ## DW_CFA_offset (col 16)
> > .byte 0x1 ## ULEB128 1
> > .align 8
> > ..Dciee0:
> > .4byte ..Dfdee0-..Dfdeb0 ## FDE length
> > ..Dfdeb0:
> > .4byte ..Dcieb0 ## CIE pointer
> > .quad ..Dcfb0 ## initial location
> > .quad ..Dcfe0-..Dcfb0 ## address range
> > .byte 0x4 ## DW_CFA_advance_loc4
> > .4byte ..Dcfi0-..Dcfb0
> > .byte 0xe ## DW_CFA_def_cfa_offset
> > .byte 0x10 ## ULEB128 16
> > .byte 0x86 ## DW_CFA_offset (col 6)
> > .byte 0x2 ## ULEB128 2
> > .byte 0x4 ## DW_CFA_advance_loc4
> > .4byte ..Dcfi1-..Dcfi0
> > .byte 0xd ## DW_CFA_def_cfa_register (col 6)
> > .byte 0x6 ## ULEB128 6
> > .align 8
> > ..Dfdee0:
> > .ident "PGC 7.0-7"
> > [7:13] svbu-mpi:~/tmp %
> > -----
> >
> > --
> > Jeff Squyres
> > jsquyres_at_[hidden]
> > For corporate legal information go to:
> > http://www.cisco.com/web/about/doing_business/legal/cri/
> >
> >
> > _______________________________________________
> > devel mailing list
> > devel_at_[hidden]
> > http://www.open-mpi.org/mailman/listinfo.cgi/devel
>
>
> _______________________________________________
> devel mailing list
> devel_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/devel
>

-- 
Jeff Squyres
jsquyres_at_[hidden]
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/