Open MPI logo

Hardware Locality Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Hardware Locality Development mailing list

Subject: Re: [hwloc-devel] [hwloc-svn] svn:hwloc r2107
From: Jeff Squyres (jsquyres_at_[hidden])
Date: 2010-05-19 11:18:41


Bring it on!
:-)

On May 19, 2010, at 11:07 AM, Brice Goglin wrote:

> This branch is pretty much ready to merge in trunk for 1.1.
> If somebody doesn't like it, please complain!
> I'll merge by the end of the week otherwise.
>
> Brice
>
>
>
>
> On 19/05/2010 17:01, bgoglin_at_[hidden] wrote:
> > Author: bgoglin
> > Date: 2010-05-19 11:01:42 EDT (Wed, 19 May 2010)
> > New Revision: 2107
> > URL: https://svn.open-mpi.org/trac/hwloc/changeset/2107
> >
> > Log:
> > Deoptimize many cpuset routines by using HWLOC_CPUSUBSET_READULONG()
> > all the time instead of manually handling the cases where a ulong index
> > is valid in one of the input cpuset and/or the other.
> >
> > The performance gain wasn't so huge (0-20%), and the code was waaaaay
> > harder to read/maintain. And things should be much easier to deal
> > with when we'll add sparse cpuset support.
> > Text files modified:
> > branches/dyncpusets/src/cpuset.c | 127 ++++++---------------------------------
> > 1 files changed, 22 insertions(+), 105 deletions(-)
> >
> > Modified: branches/dyncpusets/src/cpuset.c
> > ==============================================================================
> > --- branches/dyncpusets/src/cpuset.c (original)
> > +++ branches/dyncpusets/src/cpuset.c 2010-05-19 11:01:42 EDT (Wed, 19 May 2010)
> > @@ -553,24 +553,13 @@
> >
> > int hwloc_cpuset_isequal (const struct hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2)
> > {
> > - unsigned long val;
> > unsigned i;
> >
> > HWLOC__CPUSET_CHECK(set1);
> > HWLOC__CPUSET_CHECK(set2);
> >
> > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > - if (set1->ulongs[i] != set2->ulongs[i])
> > - return 0;
> > -
> > - val = set1->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<set2->ulongs_count; i++)
> > - if (set2->ulongs[i] != val)
> > - return 0;
> > -
> > - val = set2->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<set1->ulongs_count; i++)
> > - if (set1->ulongs[i] != val)
> > + for(i=0; i<set1->ulongs_count || i<set2->ulongs_count; i++)
> > + if (HWLOC_CPUSUBSET_READULONG(set1, i) != HWLOC_CPUSUBSET_READULONG(set2, i))
> > return 0;
> >
> > if (set1->infinite != set2->infinite)
> > @@ -581,26 +570,15 @@
> >
> > int hwloc_cpuset_intersects (const struct hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2)
> > {
> > - unsigned long val;
> > unsigned i;
> >
> > HWLOC__CPUSET_CHECK(set1);
> > HWLOC__CPUSET_CHECK(set2);
> >
> > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > - if ((set1->ulongs[i] & set2->ulongs[i]) != HWLOC_CPUSUBSET_ZERO)
> > + for(i=0; i<set1->ulongs_count || i<set2->ulongs_count; i++)
> > + if ((HWLOC_CPUSUBSET_READULONG(set1, i) & HWLOC_CPUSUBSET_READULONG(set2, i)) != HWLOC_CPUSUBSET_ZERO)
> > return 1;
> >
> > - val = set1->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<set2->ulongs_count; i++)
> > - if ((set2->ulongs[i] & val) != HWLOC_CPUSUBSET_ZERO)
> > - return 0;
> > -
> > - val = set2->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<set1->ulongs_count; i++)
> > - if ((set1->ulongs[i] & val) != HWLOC_CPUSUBSET_ZERO)
> > - return 0;
> > -
> > if (set1->infinite && set2->infinite)
> > return 0;
> >
> > @@ -614,12 +592,8 @@
> > HWLOC__CPUSET_CHECK(sub_set);
> > HWLOC__CPUSET_CHECK(super_set);
> >
> > - for(i=0; i<sub_set->ulongs_count && i<super_set->ulongs_count; i++)
> > - if (super_set->ulongs[i] != (super_set->ulongs[i] | sub_set->ulongs[i]))
> > - return 0;
> > -
> > - for(; i<sub_set->ulongs_count; i++)
> > - if (sub_set->ulongs[i] != HWLOC_CPUSUBSET_ZERO && !super_set->infinite)
> > + for(i=0; i<sub_set->ulongs_count; i++)
> > + if (HWLOC_CPUSUBSET_READULONG(super_set, i) != (HWLOC_CPUSUBSET_READULONG(super_set, i) | HWLOC_CPUSUBSET_READULONG(sub_set, i)))
> > return 0;
> >
> > if (sub_set->infinite && !super_set->infinite)
> > @@ -631,8 +605,6 @@
> > void hwloc_cpuset_or (struct hwloc_cpuset_s *res, const struct hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2)
> > {
> > const struct hwloc_cpuset_s *largest = set1->ulongs_count > set2->ulongs_count ? set1 : set2;
> > - const struct hwloc_cpuset_s *smallest = set1->ulongs_count > set2->ulongs_count ? set2 : set1;
> > - unsigned long val;
> > unsigned i;
> >
> > HWLOC__CPUSET_CHECK(res);
> > @@ -641,16 +613,8 @@
> >
> > hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot reset since the output may also be an input */
> >
> > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > - res->ulongs[i] = set1->ulongs[i] | set2->ulongs[i];
> > -
> > - val = smallest->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<largest->ulongs_count; i++)
> > - res->ulongs[i] = val | largest->ulongs[i];
> > -
> > - val |= largest->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<res->ulongs_count; i++)
> > - res->ulongs[i] = val;
> > + for(i=0; i<res->ulongs_count; i++)
> > + res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) | HWLOC_CPUSUBSET_READULONG(set2, i);
> >
> > res->infinite = set1->infinite || set2->infinite;
> > }
> > @@ -658,8 +622,6 @@
> > void hwloc_cpuset_and (struct hwloc_cpuset_s *res, const struct hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2)
> > {
> > const struct hwloc_cpuset_s *largest = set1->ulongs_count > set2->ulongs_count ? set1 : set2;
> > - const struct hwloc_cpuset_s *smallest = set1->ulongs_count > set2->ulongs_count ? set2 : set1;
> > - unsigned long val;
> > unsigned i;
> >
> > HWLOC__CPUSET_CHECK(res);
> > @@ -668,16 +630,8 @@
> >
> > hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot reset since the output may also be an input */
> >
> > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > - res->ulongs[i] = set1->ulongs[i] & set2->ulongs[i];
> > -
> > - val = smallest->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<largest->ulongs_count; i++)
> > - res->ulongs[i] = val & largest->ulongs[i];
> > -
> > - val &= largest->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<res->ulongs_count; i++)
> > - res->ulongs[i] = val;
> > + for(i=0; i<res->ulongs_count; i++)
> > + res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) & HWLOC_CPUSUBSET_READULONG(set2, i);
> >
> > res->infinite = set1->infinite && set2->infinite;
> > }
> > @@ -685,8 +639,6 @@
> > void hwloc_cpuset_andnot (struct hwloc_cpuset_s *res, const struct hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2)
> > {
> > const struct hwloc_cpuset_s *largest = set1->ulongs_count > set2->ulongs_count ? set1 : set2;
> > - const struct hwloc_cpuset_s *smallest = set1->ulongs_count > set2->ulongs_count ? set2 : set1;
> > - unsigned long val;
> > unsigned i;
> >
> > HWLOC__CPUSET_CHECK(res);
> > @@ -695,16 +647,8 @@
> >
> > hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot reset since the output may also be an input */
> >
> > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > - res->ulongs[i] = set1->ulongs[i] & ~set2->ulongs[i];
> > -
> > - val = (!smallest->infinite) != (smallest != set2) ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<largest->ulongs_count; i++)
> > - res->ulongs[i] = val & largest->ulongs[i];
> > -
> > - val &= (!largest->infinite) != (largest != set2) ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<res->ulongs_count; i++)
> > - res->ulongs[i] = val;
> > + for(i=0; i<res->ulongs_count; i++)
> > + res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) & ~HWLOC_CPUSUBSET_READULONG(set2, i);
> >
> > res->infinite = set1->infinite && !set2->infinite;
> > }
> > @@ -712,8 +656,6 @@
> > void hwloc_cpuset_xor (struct hwloc_cpuset_s *res, const struct hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2)
> > {
> > const struct hwloc_cpuset_s *largest = set1->ulongs_count > set2->ulongs_count ? set1 : set2;
> > - const struct hwloc_cpuset_s *smallest = set1->ulongs_count > set2->ulongs_count ? set2 : set1;
> > - unsigned long val;
> > unsigned i;
> >
> > HWLOC__CPUSET_CHECK(res);
> > @@ -722,23 +664,14 @@
> >
> > hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot reset since the output may also be an input */
> >
> > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > - res->ulongs[i] = set1->ulongs[i] ^ set2->ulongs[i];
> > -
> > - val = smallest->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<largest->ulongs_count; i++)
> > - res->ulongs[i] = val ^ largest->ulongs[i];
> > -
> > - val ^= largest->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(; i<res->ulongs_count; i++)
> > - res->ulongs[i] = val;
> > + for(i=0; i<res->ulongs_count; i++)
> > + res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) ^ HWLOC_CPUSUBSET_READULONG(set2, i);
> >
> > res->infinite = (!set1->infinite) != (!set2->infinite);
> > }
> >
> > void hwloc_cpuset_not (struct hwloc_cpuset_s *res, const struct hwloc_cpuset_s *set)
> > {
> > - unsigned long val;
> > unsigned i;
> >
> > HWLOC__CPUSET_CHECK(res);
> > @@ -746,12 +679,8 @@
> >
> > hwloc_cpuset_realloc_by_ulongs(res, set->ulongs_count); /* cannot reset since the output may also be an input */
> >
> > - for(i=0; i<set->ulongs_count; i++)
> > - res->ulongs[i] = ~set->ulongs[i];
> > -
> > - val = set->infinite ? HWLOC_CPUSUBSET_ZERO : HWLOC_CPUSUBSET_FULL;
> > - for(; i<res->ulongs_count; i++)
> > - res->ulongs[i] = val;
> > + for(i=0; i<res->ulongs_count; i++)
> > + res->ulongs[i] = ~HWLOC_CPUSUBSET_READULONG(set, i);
> >
> > res->infinite = !set->infinite;
> > }
> > @@ -884,7 +813,7 @@
> >
> > int hwloc_cpuset_compare(const struct hwloc_cpuset_s * set1, const struct hwloc_cpuset_s * set2)
> > {
> > - unsigned long val;
> > + const struct hwloc_cpuset_s *largest = set1->ulongs_count > set2->ulongs_count ? set1 : set2;
> > int i;
> >
> > HWLOC__CPUSET_CHECK(set1);
> > @@ -893,24 +822,12 @@
> > if ((!set1->infinite) != (!set2->infinite))
> > return !!set1->infinite - !!set2->infinite;
> >
> > - val = set2->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(i=set1->ulongs_count-1; (unsigned) i>=set2->ulongs_count; i--) {
> > - if (set1->ulongs[i] == val)
> > - continue;
> > - return set1->ulongs[i] < val ? -1 : 1;
> > - }
> > -
> > - val = set1->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > - for(i=set2->ulongs_count-1; (unsigned) i>=set1->ulongs_count; i--) {
> > - if (val == set2->ulongs[i])
> > - continue;
> > - return val < set2->ulongs[i] ? -1 : 1;
> > - }
> > -
> > - for(i=(set2->ulongs_count > set1->ulongs_count ? set1->ulongs_count : set2->ulongs_count)-1; i>=0; i--) {
> > - if (set1->ulongs[i] == set2->ulongs[i])
> > + for(i=largest->ulongs_count-1; i>=0; i--) {
> > + unsigned long val1 = HWLOC_CPUSUBSET_READULONG(set1, i);
> > + unsigned long val2 = HWLOC_CPUSUBSET_READULONG(set2, i);
> > + if (val1 == val2)
> > continue;
> > - return set1->ulongs[i] < set2->ulongs[i] ? -1 : 1;
> > + return val1 < val2 ? -1 : 1;
> > }
> >
> > return 0;
> > _______________________________________________
> > hwloc-svn mailing list
> > hwloc-svn_at_[hidden]
> > http://www.open-mpi.org/mailman/listinfo.cgi/hwloc-svn
> >
>
> _______________________________________________
> hwloc-devel mailing list
> hwloc-devel_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/hwloc-devel
>

-- 
Jeff Squyres
jsquyres_at_[hidden]
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/