Skip to content
Snippets Groups Projects
Commit a2f961bd authored by Robert Mijakovic's avatar Robert Mijakovic Committed by Adam J. Stewart
Browse files

Adds MPI_THREAD_MULTIPLE support for OpenMPI to the variant with UCX fabrics (#14194)

* Fixes:
1. MPI_THREAD_MULTIPLE problem with OpenMPI and UCX.

Changes:
1. OpenMPI provides two new depends_on options which result in UCX being compiled with multiple threads support. One implicit when OpenMPI 3.x is used, MPI_THREAD_MULTIPLE is enabled by default, and one explicit for OpenMPI <= 2.x, MPI_THREAD_MULTIPLE is disabled by default.
2. Extends UCX package to allow "Enable thread support in UCP and UCT" option.
3. Adds sha256 sums of UCX releases 1.6.1 and 1.2.0.

More details:
Fixes the issue with OpenMPI where programs which use MPI_THREAD_MULTIPLE will fail to execute because UCP worker didn't support it.
During the OpenMPI package installation it's the +thread_multiple spec was not propagated to UCX nor UCX handled it at all.
Now, the OpenMPI package is capable of handling +thread_multiple spec when UCX is request and the UCX package correctly handles +thread_multiple and compiles with the --enable-mt option.
Error message during runtime:
pml_ucx.c:226 Error: UCP worker does not support MPI_THREAD_MULTIPLE

* Adapts check of specs to read better and is the suggested form in the docs.

* Explicitly disables multithreading of UCX if +thread_multiple option is not used.
parent 0232c820
No related branches found
No related tags found
No related merge requests found
...@@ -96,6 +96,7 @@ class Openmpi(AutotoolsPackage): ...@@ -96,6 +96,7 @@ class Openmpi(AutotoolsPackage):
version('3.0.1', sha256='663450d1ee7838b03644507e8a76edfb1fba23e601e9e0b5b2a738e54acd785d') # libmpi.so.40.00.1 version('3.0.1', sha256='663450d1ee7838b03644507e8a76edfb1fba23e601e9e0b5b2a738e54acd785d') # libmpi.so.40.00.1
version('3.0.0', sha256='f699bff21db0125d8cccfe79518b77641cd83628725a1e1ed3e45633496a82d7') # libmpi.so.40.00.0 version('3.0.0', sha256='f699bff21db0125d8cccfe79518b77641cd83628725a1e1ed3e45633496a82d7') # libmpi.so.40.00.0
# Retired
version('2.1.6', sha256='98b8e1b8597bbec586a0da79fcd54a405388190247aa04d48e8c40944d4ca86e') # libmpi.so.20.10.3 version('2.1.6', sha256='98b8e1b8597bbec586a0da79fcd54a405388190247aa04d48e8c40944d4ca86e') # libmpi.so.20.10.3
version('2.1.5', sha256='b807ccab801f27c3159a5edf29051cd3331d3792648919f9c4cee48e987e7794') # libmpi.so.20.10.3 version('2.1.5', sha256='b807ccab801f27c3159a5edf29051cd3331d3792648919f9c4cee48e987e7794') # libmpi.so.20.10.3
version('2.1.4', sha256='3e03695ca8bd663bc2d89eda343c92bb3d4fc79126b178f5ddcb68a8796b24e2') # libmpi.so.20.10.3 version('2.1.4', sha256='3e03695ca8bd663bc2d89eda343c92bb3d4fc79126b178f5ddcb68a8796b24e2') # libmpi.so.20.10.3
...@@ -104,7 +105,6 @@ class Openmpi(AutotoolsPackage): ...@@ -104,7 +105,6 @@ class Openmpi(AutotoolsPackage):
version('2.1.1', sha256='bd7badd4ff3afa448c0d7f3ca0ee6ce003b957e9954aa87d8e4435759b5e4d16') # libmpi.so.20.10.1 version('2.1.1', sha256='bd7badd4ff3afa448c0d7f3ca0ee6ce003b957e9954aa87d8e4435759b5e4d16') # libmpi.so.20.10.1
version('2.1.0', sha256='b169e15f5af81bf3572db764417670f508c0df37ce86ff50deb56bd3acb43957') # libmpi.so.20.10.0 version('2.1.0', sha256='b169e15f5af81bf3572db764417670f508c0df37ce86ff50deb56bd3acb43957') # libmpi.so.20.10.0
# Retired
version('2.0.4', sha256='4f82d5f7f294becbd737319f74801206b08378188a95b70abe706fdc77a0c20b') # libmpi.so.20.0.4 version('2.0.4', sha256='4f82d5f7f294becbd737319f74801206b08378188a95b70abe706fdc77a0c20b') # libmpi.so.20.0.4
version('2.0.3', sha256='b52c0204c0e5954c9c57d383bb22b4181c09934f97783292927394d29f2a808a') # libmpi.so.20.0.3 version('2.0.3', sha256='b52c0204c0e5954c9c57d383bb22b4181c09934f97783292927394d29f2a808a') # libmpi.so.20.0.3
version('2.0.2', sha256='cae396e643f9f91f0a795f8d8694adf7bacfb16f967c22fb39e9e28d477730d3') # libmpi.so.20.0.2 version('2.0.2', sha256='cae396e643f9f91f0a795f8d8694adf7bacfb16f967c22fb39e9e28d477730d3') # libmpi.so.20.0.2
...@@ -273,6 +273,8 @@ class Openmpi(AutotoolsPackage): ...@@ -273,6 +273,8 @@ class Openmpi(AutotoolsPackage):
depends_on('zlib', when='@3.0.0:') depends_on('zlib', when='@3.0.0:')
depends_on('valgrind~mpi', when='+memchecker') depends_on('valgrind~mpi', when='+memchecker')
depends_on('ucx', when='fabrics=ucx') depends_on('ucx', when='fabrics=ucx')
depends_on('ucx +thread_multiple', when='fabrics=ucx +thread_multiple')
depends_on('ucx +thread_multiple', when='@3.0.0: fabrics=ucx')
depends_on('libfabric', when='fabrics=libfabric') depends_on('libfabric', when='fabrics=libfabric')
depends_on('slurm', when='schedulers=slurm') depends_on('slurm', when='schedulers=slurm')
depends_on('lsf', when='schedulers=lsf') depends_on('lsf', when='schedulers=lsf')
......
...@@ -14,6 +14,7 @@ class Ucx(AutotoolsPackage): ...@@ -14,6 +14,7 @@ class Ucx(AutotoolsPackage):
url = "https://github.com/openucx/ucx/releases/download/v1.3.1/ucx-1.3.1.tar.gz" url = "https://github.com/openucx/ucx/releases/download/v1.3.1/ucx-1.3.1.tar.gz"
# Current # Current
version('1.6.1', sha256='1425648aa03f5fa40e4bc5c4a5a83fe0292e2fe44f6054352fbebbf6d8f342a1')
version('1.6.0', sha256='360e885dd7f706a19b673035a3477397d100a02eb618371697c7f3ee4e143e2c') version('1.6.0', sha256='360e885dd7f706a19b673035a3477397d100a02eb618371697c7f3ee4e143e2c')
version('1.5.2', sha256='1a333853069860e86ba69b8d071ccc9871209603790e2b673ec61f8086913fad') version('1.5.2', sha256='1a333853069860e86ba69b8d071ccc9871209603790e2b673ec61f8086913fad')
version('1.5.1', sha256='567119cd80ad2ae6968ecaa4bd1d2a80afadd037ccc988740f668de10d2fdb7e') version('1.5.1', sha256='567119cd80ad2ae6968ecaa4bd1d2a80afadd037ccc988740f668de10d2fdb7e')
...@@ -25,6 +26,19 @@ class Ucx(AutotoolsPackage): ...@@ -25,6 +26,19 @@ class Ucx(AutotoolsPackage):
version('1.3.0', sha256='71e69e6d78a4950cc5a1edcbe59bf7a8f8e38d59c9f823109853927c4d442952') version('1.3.0', sha256='71e69e6d78a4950cc5a1edcbe59bf7a8f8e38d59c9f823109853927c4d442952')
version('1.2.2', sha256='914d10fee8f970d4fb286079dd656cf8a260ec7d724d5f751b3109ed32a6da63') version('1.2.2', sha256='914d10fee8f970d4fb286079dd656cf8a260ec7d724d5f751b3109ed32a6da63')
version('1.2.1', sha256='fc63760601c03ff60a2531ec3c6637e98f5b743576eb410f245839c84a0ad617') version('1.2.1', sha256='fc63760601c03ff60a2531ec3c6637e98f5b743576eb410f245839c84a0ad617')
version('1.2.0', sha256='1e1a62d6d0f89ce59e384b0b5b30b416b8fd8d7cedec4182a5319d0dfddf649c')
variant('thread_multiple', default=False,
description='Enable thread support in UCP and UCT')
depends_on('numactl') depends_on('numactl')
depends_on('rdma-core') depends_on('rdma-core')
def configure_args(self):
spec = self.spec
config_args = []
if '+thread_multiple' in spec:
config_args.append('--enable-mt')
else:
config_args.append('--disable-mt')
return config_args
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment