File numactl-2.0.14.20.g4ee5e0c.obscpio of Package numactl.21265
07070100000000000081A400003EA60000003200000001612720430000011B000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/.packit.yamldownstream_package_name: numactl
jobs:
- job: copr_build
metadata:
targets:
- fedora-30-x86_64
- fedora-31-x86_64
- fedora-rawhide-x86_64
trigger: pull_request
specfile_path: numactl.spec
synced_files:
- numactl.spec
- .packit.yaml
upstream_package_name: numactl
07070100000001000081A400003EA6000000320000000161272043000006E8000000000000000000000000000000000000002700000000numactl-2.0.14.20.g4ee5e0c/.travis.ymldist: bionic
language: c
env:
global:
- MAKEFLAGS="-j 2" # parallelize compilation process
matrix:
include:
- name: "gcc 10"
env: MATRIX_ENV="CC=gcc-10 CXX=g++-10"
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-10
- name: "gcc 9"
env: MATRIX_ENV="CC=gcc-9 CXX=g++-9"
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-9
- name: "gcc 8"
env: MATRIX_ENV="CC=gcc-8 CXX=g++-8"
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-8
- name: "gcc 7"
env: MATRIX_ENV="CC=gcc-7 CXX=g++-7"
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-7
- name: "gcc 6"
env: MATRIX_ENV="CC=gcc-6 CXX=g++-6"
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-6
- name: "gcc 5"
dist: xenial
env: MATRIX_ENV="CC=gcc-5 CXX=g++-5"
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-5
- name: "gcc 4.9"
dist: xenial
env: MATRIX_ENV="CC=gcc-4.9 CXX=g++-4.9"
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-4.9
before_install:
- eval "${MATRIX_ENV}"
before_script:
- ./autogen.sh
- ./configure CPPFLAGS=-Werror
script:
- make -k
- ./numactl --show
- make -k check VERBOSE=1 TESTS='test/distance test/nodemap test/numademo test/tbitmap'
- make distcheck LOG_COMPILER='sh -c "exit 77"'
07070100000002000081A400003EA60000003200000001612720430000074D000000000000000000000000000000000000002600000000numactl-2.0.14.20.g4ee5e0c/INSTALL.md## Building `numactl`
TL;DR:
```shell
$ ./autogen.sh
$ ./configure
$ make
# make install
```
Start by configuring the build running the configure script:
```shell
$ ./configure
```
You can pass options to configure to define build options, to pass it
compiler paths, compiler flags and to define the installation layout. Run
`./configure --help` for more details on how to customize the build.
Once build is completed, build `numactl` with:
```shell
$ make
```
If you would like to increase verbosity by printing the full build command
lines, pass `make` the `V=1` parameter:
```shell
$ make V=1
```
You can build and run the tests included with numactl with the following
command:
```shell
$ make check
```
The results will be saved in `test/*.log` files and a `test-suite.log` will be
generated with the summary of test passes and failures.
Install numactl to the system by running the following command as root:
```shell
# make install
```
You can also install it to a staging directory, in which case it is not
required to be root while running the install steps. Just pass a DESTDIR
variable while running `make install` with the path to the staging
directory.
```shell
$ make install DESTDIR=/path/to/staging/numactl
```
## Using a snapshot from the Git repository
First, the build system files need to be generated using the `./autogen.sh`
script, which calls `autoreconf` with the appropriate options to generate the
configure script and the templates for `Makefile`, `config.h`, etc.
Once those files are generated, follow the normal steps to configure and
build numactl.
In order to create a distribution tarball, use `make dist` from a configured
build tree. Use `make distcheck` to build a distribution tarball and confirm
that rebuilding from that archive works as expected, that building from
out-of-tree works, that test cases pass.
07070100000003000081A400003EA600000032000000016127204300004643000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/LICENSE.GPL2 GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.
07070100000004000081A400003EA6000000320000000161272043000067A2000000000000000000000000000000000000002B00000000numactl-2.0.14.20.g4ee5e0c/LICENSE.LGPL2.1 GNU LESSER GENERAL PUBLIC LICENSE
Version 2.1, February 1999
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
[This is the first released version of the Lesser GPL. It also counts
as the successor of the GNU Library Public License, version 2, hence
the version number 2.1.]
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
Licenses are intended to guarantee your freedom to share and change
free software--to make sure the software is free for all its users.
This license, the Lesser General Public License, applies to some
specially designated software packages--typically libraries--of the
Free Software Foundation and other authors who decide to use it. You
can use it too, but we suggest you first think carefully about whether
this license or the ordinary General Public License is the better
strategy to use in any particular case, based on the explanations below.
When we speak of free software, we are referring to freedom of use,
not price. Our General Public Licenses are designed to make sure that
you have the freedom to distribute copies of free software (and charge
for this service if you wish); that you receive source code or can get
it if you want it; that you can change the software and use pieces of
it in new free programs; and that you are informed that you can do
these things.
To protect your rights, we need to make restrictions that forbid
distributors to deny you these rights or to ask you to surrender these
rights. These restrictions translate to certain responsibilities for
you if you distribute copies of the library or if you modify it.
For example, if you distribute copies of the library, whether gratis
or for a fee, you must give the recipients all the rights that we gave
you. You must make sure that they, too, receive or can get the source
code. If you link other code with the library, you must provide
complete object files to the recipients, so that they can relink them
with the library after making changes to the library and recompiling
it. And you must show them these terms so they know their rights.
We protect your rights with a two-step method: (1) we copyright the
library, and (2) we offer you this license, which gives you legal
permission to copy, distribute and/or modify the library.
To protect each distributor, we want to make it very clear that
there is no warranty for the free library. Also, if the library is
modified by someone else and passed on, the recipients should know
that what they have is not the original version, so that the original
author's reputation will not be affected by problems that might be
introduced by others.
Finally, software patents pose a constant threat to the existence of
any free program. We wish to make sure that a company cannot
effectively restrict the users of a free program by obtaining a
restrictive license from a patent holder. Therefore, we insist that
any patent license obtained for a version of the library must be
consistent with the full freedom of use specified in this license.
Most GNU software, including some libraries, is covered by the
ordinary GNU General Public License. This license, the GNU Lesser
General Public License, applies to certain designated libraries, and
is quite different from the ordinary General Public License. We use
this license for certain libraries in order to permit linking those
libraries into non-free programs.
When a program is linked with a library, whether statically or using
a shared library, the combination of the two is legally speaking a
combined work, a derivative of the original library. The ordinary
General Public License therefore permits such linking only if the
entire combination fits its criteria of freedom. The Lesser General
Public License permits more lax criteria for linking other code with
the library.
We call this license the "Lesser" General Public License because it
does Less to protect the user's freedom than the ordinary General
Public License. It also provides other free software developers Less
of an advantage over competing non-free programs. These disadvantages
are the reason we use the ordinary General Public License for many
libraries. However, the Lesser license provides advantages in certain
special circumstances.
For example, on rare occasions, there may be a special need to
encourage the widest possible use of a certain library, so that it becomes
a de-facto standard. To achieve this, non-free programs must be
allowed to use the library. A more frequent case is that a free
library does the same job as widely used non-free libraries. In this
case, there is little to gain by limiting the free library to free
software only, so we use the Lesser General Public License.
In other cases, permission to use a particular library in non-free
programs enables a greater number of people to use a large body of
free software. For example, permission to use the GNU C Library in
non-free programs enables many more people to use the whole GNU
operating system, as well as its variant, the GNU/Linux operating
system.
Although the Lesser General Public License is Less protective of the
users' freedom, it does ensure that the user of a program that is
linked with the Library has the freedom and the wherewithal to run
that program using a modified version of the Library.
The precise terms and conditions for copying, distribution and
modification follow. Pay close attention to the difference between a
"work based on the library" and a "work that uses the library". The
former contains code derived from the library, whereas the latter must
be combined with the library in order to run.
GNU LESSER GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License Agreement applies to any software library or other
program which contains a notice placed by the copyright holder or
other authorized party saying it may be distributed under the terms of
this Lesser General Public License (also called "this License").
Each licensee is addressed as "you".
A "library" means a collection of software functions and/or data
prepared so as to be conveniently linked with application programs
(which use some of those functions and data) to form executables.
The "Library", below, refers to any such software library or work
which has been distributed under these terms. A "work based on the
Library" means either the Library or any derivative work under
copyright law: that is to say, a work containing the Library or a
portion of it, either verbatim or with modifications and/or translated
straightforwardly into another language. (Hereinafter, translation is
included without limitation in the term "modification".)
"Source code" for a work means the preferred form of the work for
making modifications to it. For a library, complete source code means
all the source code for all modules it contains, plus any associated
interface definition files, plus the scripts used to control compilation
and installation of the library.
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running a program using the Library is not restricted, and output from
such a program is covered only if its contents constitute a work based
on the Library (independent of the use of the Library in a tool for
writing it). Whether that is true depends on what the Library does
and what the program that uses the Library does.
1. You may copy and distribute verbatim copies of the Library's
complete source code as you receive it, in any medium, provided that
you conspicuously and appropriately publish on each copy an
appropriate copyright notice and disclaimer of warranty; keep intact
all the notices that refer to this License and to the absence of any
warranty; and distribute a copy of this License along with the
Library.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange for a
fee.
2. You may modify your copy or copies of the Library or any portion
of it, thus forming a work based on the Library, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) The modified work must itself be a software library.
b) You must cause the files modified to carry prominent notices
stating that you changed the files and the date of any change.
c) You must cause the whole of the work to be licensed at no
charge to all third parties under the terms of this License.
d) If a facility in the modified Library refers to a function or a
table of data to be supplied by an application program that uses
the facility, other than as an argument passed when the facility
is invoked, then you must make a good faith effort to ensure that,
in the event an application does not supply such function or
table, the facility still operates, and performs whatever part of
its purpose remains meaningful.
(For example, a function in a library to compute square roots has
a purpose that is entirely well-defined independent of the
application. Therefore, Subsection 2d requires that any
application-supplied function or table used by this function must
be optional: if the application does not supply it, the square
root function must still compute square roots.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Library,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Library, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote
it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Library.
In addition, mere aggregation of another work not based on the Library
with the Library (or with a work based on the Library) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may opt to apply the terms of the ordinary GNU General Public
License instead of this License to a given copy of the Library. To do
this, you must alter all the notices that refer to this License, so
that they refer to the ordinary GNU General Public License, version 2,
instead of to this License. (If a newer version than version 2 of the
ordinary GNU General Public License has appeared, then you can specify
that version instead if you wish.) Do not make any other change in
these notices.
Once this change is made in a given copy, it is irreversible for
that copy, so the ordinary GNU General Public License applies to all
subsequent copies and derivative works made from that copy.
This option is useful when you wish to copy part of the code of
the Library into a program that is not a library.
4. You may copy and distribute the Library (or a portion or
derivative of it, under Section 2) in object code or executable form
under the terms of Sections 1 and 2 above provided that you accompany
it with the complete corresponding machine-readable source code, which
must be distributed under the terms of Sections 1 and 2 above on a
medium customarily used for software interchange.
If distribution of object code is made by offering access to copy
from a designated place, then offering equivalent access to copy the
source code from the same place satisfies the requirement to
distribute the source code, even though third parties are not
compelled to copy the source along with the object code.
5. A program that contains no derivative of any portion of the
Library, but is designed to work with the Library by being compiled or
linked with it, is called a "work that uses the Library". Such a
work, in isolation, is not a derivative work of the Library, and
therefore falls outside the scope of this License.
However, linking a "work that uses the Library" with the Library
creates an executable that is a derivative of the Library (because it
contains portions of the Library), rather than a "work that uses the
library". The executable is therefore covered by this License.
Section 6 states terms for distribution of such executables.
When a "work that uses the Library" uses material from a header file
that is part of the Library, the object code for the work may be a
derivative work of the Library even though the source code is not.
Whether this is true is especially significant if the work can be
linked without the Library, or if the work is itself a library. The
threshold for this to be true is not precisely defined by law.
If such an object file uses only numerical parameters, data
structure layouts and accessors, and small macros and small inline
functions (ten lines or less in length), then the use of the object
file is unrestricted, regardless of whether it is legally a derivative
work. (Executables containing this object code plus portions of the
Library will still fall under Section 6.)
Otherwise, if the work is a derivative of the Library, you may
distribute the object code for the work under the terms of Section 6.
Any executables containing that work also fall under Section 6,
whether or not they are linked directly with the Library itself.
6. As an exception to the Sections above, you may also combine or
link a "work that uses the Library" with the Library to produce a
work containing portions of the Library, and distribute that work
under terms of your choice, provided that the terms permit
modification of the work for the customer's own use and reverse
engineering for debugging such modifications.
You must give prominent notice with each copy of the work that the
Library is used in it and that the Library and its use are covered by
this License. You must supply a copy of this License. If the work
during execution displays copyright notices, you must include the
copyright notice for the Library among them, as well as a reference
directing the user to the copy of this License. Also, you must do one
of these things:
a) Accompany the work with the complete corresponding
machine-readable source code for the Library including whatever
changes were used in the work (which must be distributed under
Sections 1 and 2 above); and, if the work is an executable linked
with the Library, with the complete machine-readable "work that
uses the Library", as object code and/or source code, so that the
user can modify the Library and then relink to produce a modified
executable containing the modified Library. (It is understood
that the user who changes the contents of definitions files in the
Library will not necessarily be able to recompile the application
to use the modified definitions.)
b) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (1) uses at run time a
copy of the library already present on the user's computer system,
rather than copying library functions into the executable, and (2)
will operate properly with a modified version of the library, if
the user installs one, as long as the modified version is
interface-compatible with the version that the work was made with.
c) Accompany the work with a written offer, valid for at
least three years, to give the same user the materials
specified in Subsection 6a, above, for a charge no more
than the cost of performing this distribution.
d) If distribution of the work is made by offering access to copy
from a designated place, offer equivalent access to copy the above
specified materials from the same place.
e) Verify that the user has already received a copy of these
materials or that you have already sent this user a copy.
For an executable, the required form of the "work that uses the
Library" must include any data and utility programs needed for
reproducing the executable from it. However, as a special exception,
the materials to be distributed need not include anything that is
normally distributed (in either source or binary form) with the major
components (compiler, kernel, and so on) of the operating system on
which the executable runs, unless that component itself accompanies
the executable.
It may happen that this requirement contradicts the license
restrictions of other proprietary libraries that do not normally
accompany the operating system. Such a contradiction means you cannot
use both them and the Library together in an executable that you
distribute.
7. You may place library facilities that are a work based on the
Library side-by-side in a single library together with other library
facilities not covered by this License, and distribute such a combined
library, provided that the separate distribution of the work based on
the Library and of the other library facilities is otherwise
permitted, and provided that you do these two things:
a) Accompany the combined library with a copy of the same work
based on the Library, uncombined with any other library
facilities. This must be distributed under the terms of the
Sections above.
b) Give prominent notice with the combined library of the fact
that part of it is a work based on the Library, and explaining
where to find the accompanying uncombined form of the same work.
8. You may not copy, modify, sublicense, link with, or distribute
the Library except as expressly provided under this License. Any
attempt otherwise to copy, modify, sublicense, link with, or
distribute the Library is void, and will automatically terminate your
rights under this License. However, parties who have received copies,
or rights, from you under this License will not have their licenses
terminated so long as such parties remain in full compliance.
9. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Library or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Library (or any work based on the
Library), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Library or works based on it.
10. Each time you redistribute the Library (or any work based on the
Library), the recipient automatically receives a license from the
original licensor to copy, distribute, link with or modify the Library
subject to these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties with
this License.
11. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Library at all. For example, if a patent
license would not permit royalty-free redistribution of the Library by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Library.
If any portion of this section is held invalid or unenforceable under any
particular circumstance, the balance of the section is intended to apply,
and the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
12. If the distribution and/or use of the Library is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Library under this License may add
an explicit geographical distribution limitation excluding those countries,
so that distribution is permitted only in or among countries not thus
excluded. In such case, this License incorporates the limitation as if
written in the body of this License.
13. The Free Software Foundation may publish revised and/or new
versions of the Lesser General Public License from time to time.
Such new versions will be similar in spirit to the present version,
but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Library
specifies a version number of this License which applies to it and
"any later version", you have the option of following the terms and
conditions either of that version or of any later version published by
the Free Software Foundation. If the Library does not specify a
license version number, you may choose any version ever published by
the Free Software Foundation.
14. If you wish to incorporate parts of the Library into other free
programs whose distribution conditions are incompatible with these,
write to the author to ask for permission. For software which is
copyrighted by the Free Software Foundation, write to the Free
Software Foundation; we sometimes make exceptions for this. Our
decision will be guided by the two goals of preserving the free status
of all derivatives of our free software and of promoting the sharing
and reuse of software generally.
NO WARRANTY
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Libraries
If you develop a new library, and you want it to be of the greatest
possible use to the public, we recommend making it free software that
everyone can redistribute and change. You can do so by permitting
redistribution under these terms (or, alternatively, under the terms of the
ordinary General Public License).
To apply these terms, attach the following notices to the library. It is
safest to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least the
"copyright" line and a pointer to where the full notice is found.
<one line to give the library's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Also add information on how to contact you by electronic and paper mail.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the library, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the
library `Frob' (a library for tweaking knobs) written by James Random Hacker.
<signature of Ty Coon>, 1 April 1990
Ty Coon, President of Vice
That's all there is to it!
07070100000005000081A400003EA600000032000000016127204300000F84000000000000000000000000000000000000002700000000numactl-2.0.14.20.g4ee5e0c/Makefile.am
ACLOCAL_AMFLAGS = -I m4
CLEANFILES =
AM_CPPFLAGS = -Wall
bin_PROGRAMS = numactl numastat numademo migratepages migspeed memhog
lib_LTLIBRARIES = libnuma.la
include_HEADERS = numa.h numacompat1.h numaif.h
noinst_HEADERS = numaint.h util.h
dist_man_MANS = move_pages.2 numa.3 numactl.8 numastat.8 migratepages.8 migspeed.8 memhog.8
EXTRA_DIST = README.md INSTALL.md LICENSE.GPL2 LICENSE.LGPL2.1
numactl_SOURCES = numactl.c util.c shm.c shm.h
numactl_LDADD = libnuma.la
numastat_SOURCES = numastat.c
numastat_CFLAGS = $(AM_CFLAGS) -std=gnu99
numademo_SOURCES = numademo.c stream_lib.c stream_lib.h mt.c mt.h clearcache.c clearcache.h
numademo_CPPFLAGS = $(AM_CPPFLAGS) -DHAVE_STREAM_LIB -DHAVE_MT -DHAVE_CLEAR_CACHE
numademo_CFLAGS = $(AM_CFLAGS) -O3 -ffast-math -funroll-loops
if HAVE_TREE_VECTORIZE
numademo_CFLAGS += -ftree-vectorize
endif
numademo_LDADD = libnuma.la -lm
migratepages_SOURCES = migratepages.c util.c
migratepages_LDADD = libnuma.la
migspeed_SOURCES = migspeed.c util.c
migspeed_LDADD = libnuma.la -lrt
memhog_SOURCES = memhog.c util.c
memhog_LDADD = libnuma.la
libnuma_la_SOURCES = libnuma.c syscall.c distance.c affinity.c affinity.h sysfs.c sysfs.h rtnetlink.c rtnetlink.h versions.ldscript
libnuma_la_LDFLAGS = -version-info 1:0:0 -Wl,--version-script,$(srcdir)/versions.ldscript -Wl,-init,numa_init -Wl,-fini,numa_fini
check_PROGRAMS = \
test/distance \
test/ftok \
test/mbind_mig_pages \
test/migrate_pages \
test/move_pages \
test/mynode \
test/node-parse \
test/nodemap \
test/pagesize \
test/prefered \
test/randmap \
test/realloc_test \
test/tbitmap \
test/tshared
EXTRA_DIST += \
test/README \
test/bind_range \
test/checkaffinity \
test/checktopology \
test/numademo \
test/printcpu \
test/regress \
test/regress2 \
test/regress-io \
test/runltp \
test/shmtest
test_distance_SOURCES = test/distance.c
test_distance_LDADD = libnuma.la
test_ftok_SOURCES = test/ftok.c
test_ftok_LDADD = libnuma.la
test_mbind_mig_pages_SOURCES = test/mbind_mig_pages.c
test_mbind_mig_pages_LDADD = libnuma.la
test_migrate_pages_SOURCES = test/migrate_pages.c
test_migrate_pages_LDADD = libnuma.la
test_move_pages_SOURCES = test/move_pages.c
test_move_pages_LDADD = libnuma.la
test_mynode_SOURCES = test/mynode.c
test_mynode_LDADD = libnuma.la
test_node_parse_SOURCES = test/node-parse.c util.c
test_node_parse_LDADD = libnuma.la
test_nodemap_SOURCES = test/nodemap.c
test_nodemap_LDADD = libnuma.la
test_pagesize_SOURCES = test/pagesize.c
test_pagesize_LDADD = libnuma.la
test_prefered_SOURCES = test/prefered.c
test_prefered_LDADD = libnuma.la
test_randmap_SOURCES = test/randmap.c
test_randmap_LDADD = libnuma.la
test_realloc_test_SOURCES = test/realloc_test.c
test_realloc_test_LDADD = libnuma.la
test_tbitmap_SOURCES = test/tbitmap.c util.c
test_tbitmap_LDADD = libnuma.la
test_tshared_SOURCES = test/tshared.c
test_tshared_LDADD = libnuma.la
# Legacy make rules for test cases.
# These will be superceded by "make check".
regress1: $(check_PROGRAMS)
cd test && ./regress
regress2: $(check_PROGRAMS)
cd test && ./regress2
test_numademo: numademo
./numademo -t -e 10M
test: all $(check_PROGRAMS) regress1 regress2 test_numademo
TESTS_ENVIRONMENT = builddir='$(builddir)'; export builddir;
TESTS = \
test/bind_range \
test/checkaffinity \
test/checktopology \
test/distance \
test/move_pages \
test/nodemap \
test/numademo \
test/regress \
test/tbitmap
# These are known to be broken:
# test/prefered
# test/randmap
SED_PROCESS = \
$(AM_V_GEN)$(SED) \
-e 's,@VERSION\@,$(VERSION),g' \
-e 's,@prefix\@,$(prefix),g' \
-e 's,@exec_prefix\@,$(exec_prefix),g' \
-e 's,@libdir\@,$(libdir),g' \
-e 's,@includedir\@,$(includedir),g' \
-e 's,@LIBS\@,$(LIBS),g' \
< $< > $@ || rm $@
%.pc: %.pc.in Makefile
$(SED_PROCESS)
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = numa.pc
EXTRA_DIST += numa.pc.in
CLEANFILES += numa.pc
07070100000006000081A400003EA6000000320000000161272043000006E6000000000000000000000000000000000000002500000000numactl-2.0.14.20.g4ee5e0c/README.md# numactl
[](https://travis-ci.org/numactl/numactl)
Simple NUMA policy support. It consists of a numactl program to run other
programs with a specific NUMA policy and a libnuma shared library ("NUMA API")
to set NUMA policy in applications.
The libnuma binary interface is supposed to stay binary compatible.
Incompatible changes will use new symbol version numbers.
In addition there are various test and utility programs, like `numastat` to
display NUMA allocation statistics and `memhog`.
In `test/` there is a small regression test suite.
Note that `regress` assumes an unloaded machine with memory free on each node.
Otherwise you will get spurious failures in the non-strict policies (preferred,
interleave.)
See the manpages [`numactl.8`](https://linux.die.net/man/8/numactl) and
[`numa.3`](https://linux.die.net/man/3/numa) for details.
# License, Copyrights, Acknowledgements
`numactl` and the demo programs are under the GNU General Public License, v.2.
`libnuma` is under the GNU Lesser General Public License, v2.1.
The manpages are under the same license as the Linux manpages (see the files.)
`numademo` links with a library derived from the C version of STREAM by John D.
McCalpin and Joe R. Zagar for one sub benchmark. See `stream_lib.c` for the
license. In particular when you publish `numademo` output you might need to pay
attention there or filter out the STREAM results.
It also uses a public domain Mersenne Twister implementation from Michael
Brundage.
Version 2.0.10-rc2: (C)2014 SGI
Author:
Andi Kleen, SUSE Labs
Version 2.0.0 by Cliff Wickman (`cpw@sgi.com`), Christoph Lameter
(`clameter@sgi.com`) and Lee Schermerhorn (`lee.schermerhorn@hp.com`).
07070100000007000081A400003EA6000000320000000161272043000020C4000000000000000000000000000000000000002600000000numactl-2.0.14.20.g4ee5e0c/affinity.c/* Support for specifying IO affinity by various means.
Copyright 2010 Intel Corporation
Author: Andi Kleen
libnuma is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; version
2.1.
libnuma is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should find a copy of v2.1 of the GNU Lesser General Public License
somewhere on your Linux system; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/* Notebook:
- Separate real errors from no NUMA with fallback
- Infiniband
- FCoE?
- Support for other special IO devices
- Specifying cpu subsets inside the IO node?
- Handle multiple IO nodes (needs kernel changes)
- Better support for multi-path IO?
*/
#define _GNU_SOURCE 1
#include <string.h>
#include <errno.h>
#include <sys/stat.h>
#include <netdb.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <net/if.h>
#include <dirent.h>
#include <linux/rtnetlink.h>
#include <linux/netlink.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <ctype.h>
#include <assert.h>
#include <regex.h>
#include <sys/sysmacros.h>
#include "numa.h"
#include "numaint.h"
#include "sysfs.h"
#include "affinity.h"
#include "rtnetlink.h"
static int badchar(const char *s)
{
if (strpbrk(s, "/."))
return 1;
return 0;
}
static int node_parse_failure(int ret, char *cls, const char *dev)
{
if (!cls)
cls = "";
if (ret == -2)
numa_warn(W_node_parse1,
"Kernel does not know node mask for%s%s device `%s'",
*cls ? " " : "", cls, dev);
else
numa_warn(W_node_parse2,
"Cannot read node mask for %s device `%s'",
cls, dev);
return -1;
}
/* Generic sysfs class lookup */
static int
affinity_class(struct bitmask *mask, char *cls, const char *dev)
{
int ret;
while (isspace(*dev))
dev++;
if (badchar(dev)) {
numa_warn(W_badchar, "Illegal characters in `%s' specification",
dev);
return -1;
}
/* Somewhat hackish: extract device from symlink path.
Better would be a direct backlink. This knows slightly too
much about the actual sysfs layout. */
char path[1024];
char *fn = NULL;
if (asprintf(&fn, "/sys/class/%s/%s", cls, dev) > 0 &&
readlink(fn, path, sizeof path) > 0) {
regex_t re;
regmatch_t match[2];
char *p;
regcomp(&re, "(/devices/pci[0-9a-fA-F:/]+\\.[0-9]+)/",
REG_EXTENDED);
ret = regexec(&re, path, 2, match, 0);
regfree(&re);
if (ret == 0) {
free(fn);
assert(match[0].rm_so > 0);
assert(match[0].rm_eo > 0);
path[match[1].rm_eo + 1] = 0;
p = path + match[0].rm_so;
ret = sysfs_node_read(mask, "/sys/%s/numa_node", p);
if (ret < 0)
return node_parse_failure(ret, NULL, p);
return ret;
}
}
free(fn);
ret = sysfs_node_read(mask, "/sys/class/%s/%s/device/numa_node",
cls, dev);
if (ret < 0)
return node_parse_failure(ret, cls, dev);
return 0;
}
/* Turn file (or device node) into class name */
static int affinity_file(struct bitmask *mask, char *cls, const char *file)
{
struct stat st;
DIR *dir;
int n;
unsigned maj = 0, min = 0;
dev_t d;
struct dirent *dep;
cls = "block";
char fn[sizeof("/sys/class/") + strlen(cls)];
if (stat(file, &st) < 0) {
numa_warn(W_blockdev1, "Cannot stat file %s", file);
return -1;
}
d = st.st_dev;
if (S_ISCHR(st.st_mode)) {
/* Better choice than misc? Most likely misc will not work
anyways unless the kernel is fixed. */
cls = "misc";
d = st.st_rdev;
} else if (S_ISBLK(st.st_mode))
d = st.st_rdev;
sprintf(fn, "/sys/class/%s", cls);
dir = opendir(fn);
if (!dir) {
numa_warn(W_blockdev2, "Cannot enumerate %s devices in sysfs",
cls);
return -1;
}
while ((dep = readdir(dir)) != NULL) {
char *name = dep->d_name;
int ret;
if (*name == '.')
continue;
char *dev;
char fn2[sizeof("/sys/class/block//dev") + strlen(name)];
n = -1;
if (sprintf(fn2, "/sys/class/block/%s/dev", name) < 0)
break;
dev = sysfs_read(fn2);
if (dev) {
n = sscanf(dev, "%u:%u", &maj, &min);
free(dev);
}
if (n != 2) {
numa_warn(W_blockdev3, "Cannot parse sysfs device %s",
name);
continue;
}
if (major(d) != maj || minor(d) != min)
continue;
ret = affinity_class(mask, "block", name);
closedir(dir);
return ret;
}
closedir(dir);
numa_warn(W_blockdev5, "Cannot find block device %x:%x in sysfs for `%s'",
maj, min, file);
return -1;
}
/* Look up interface of route using rtnetlink. */
static int find_route(struct sockaddr *dst, int *iifp)
{
struct rtattr *rta;
const int hdrlen = NLMSG_LENGTH(sizeof(struct rtmsg));
struct {
struct nlmsghdr msg;
struct rtmsg rt;
char buf[256];
} req = {
.msg = {
.nlmsg_len = hdrlen,
.nlmsg_type = RTM_GETROUTE,
.nlmsg_flags = NLM_F_REQUEST,
},
.rt = {
.rtm_family = dst->sa_family,
},
};
struct sockaddr_nl adr = {
.nl_family = AF_NETLINK,
};
if (rta_put_address(&req.msg, RTA_DST, dst) < 0) {
numa_warn(W_netlink1, "Cannot handle network family %x",
dst->sa_family);
return -1;
}
if (rtnetlink_request(&req.msg, sizeof req, &adr) < 0) {
numa_warn(W_netlink2, "Cannot request rtnetlink route: %s",
strerror(errno));
return -1;
}
/* Fish the interface out of the netlink soup. */
rta = NULL;
while ((rta = rta_get(&req.msg, rta, hdrlen)) != NULL) {
if (rta->rta_type == RTA_OIF) {
memcpy(iifp, RTA_DATA(rta), sizeof(int));
return 0;
}
}
numa_warn(W_netlink3, "rtnetlink query did not return interface");
return -1;
}
static int iif_to_name(int iif, struct ifreq *ifr)
{
int n;
int sk = socket(PF_INET, SOCK_DGRAM, 0);
if (sk < 0)
return -1;
ifr->ifr_ifindex = iif;
n = ioctl(sk, SIOCGIFNAME, ifr);
close(sk);
return n;
}
/* Resolve an IP address to the nodes of a network device.
This generally only attempts to handle simple cases:
no multi-path, no bounding etc. In these cases only
the first interface or none is chosen. */
static int affinity_ip(struct bitmask *mask, char *cls, const char *id)
{
struct addrinfo *ai;
int n;
int iif;
struct ifreq ifr;
if ((n = getaddrinfo(id, NULL, NULL, &ai)) != 0) {
numa_warn(W_net1, "Cannot resolve %s: %s",
id, gai_strerror(n));
return -1;
}
if (find_route(&ai->ai_addr[0], &iif) < 0)
goto out_ai;
if (iif_to_name(iif, &ifr) < 0) {
numa_warn(W_net2, "Cannot resolve network interface %d", iif);
goto out_ai;
}
freeaddrinfo(ai);
return affinity_class(mask, "net", ifr.ifr_name);
out_ai:
freeaddrinfo(ai);
return -1;
}
/* Look up affinity for a PCI device */
static int affinity_pci(struct bitmask *mask, char *cls, const char *id)
{
unsigned seg, bus, dev, func;
int n, ret;
/* Func is optional. */
if ((n = sscanf(id, "%x:%x:%x.%x",&seg,&bus,&dev,&func)) == 4 || n == 3) {
if (n == 3)
func = 0;
}
/* Segment is optional too */
else if ((n = sscanf(id, "%x:%x.%x",&bus,&dev,&func)) == 3 || n == 2) {
seg = 0;
if (n == 2)
func = 0;
} else {
numa_warn(W_pci1, "Cannot parse PCI device `%s'", id);
return -1;
}
ret = sysfs_node_read(mask,
"/sys/devices/pci%04x:%02x/%04x:%02x:%02x.%x/numa_node",
seg, bus, seg, bus, dev, func);
if (ret < 0)
return node_parse_failure(ret, cls, id);
return 0;
}
static struct handler {
char first;
char *name;
char *cls;
int (*handler)(struct bitmask *mask, char *cls, const char *desc);
} handlers[] = {
{ 'n', "netdev:", "net", affinity_class },
{ 'i', "ip:", NULL, affinity_ip },
{ 'f', "file:", NULL, affinity_file },
{ 'b', "block:", "block", affinity_class },
{ 'p', "pci:", NULL, affinity_pci },
{}
};
hidden int resolve_affinity(const char *id, struct bitmask *mask)
{
struct handler *h;
for (h = &handlers[0]; h->first; h++) {
int len;
if (id[0] != h->first)
continue;
len = strlen(h->name);
if (!strncmp(id, h->name, len)) {
int ret = h->handler(mask, h->cls, id + len);
if (ret == -2) {
numa_warn(W_nonode, "Kernel does not know node for %s\n",
id + len);
}
return ret;
}
}
return NO_IO_AFFINITY;
}
07070100000008000081A400003EA60000003200000001612720430000005C000000000000000000000000000000000000002600000000numactl-2.0.14.20.g4ee5e0c/affinity.henum {
NO_IO_AFFINITY = -2
};
int resolve_affinity(const char *id, struct bitmask *mask);
07070100000009000081ED00003EA600000032000000016127204300000032000000000000000000000000000000000000002600000000numactl-2.0.14.20.g4ee5e0c/autogen.sh#!/bin/sh
set -e
autoreconf --install --symlink
0707010000000A000081A400003EA600000032000000016127204300000791000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/clearcache.c/* Clear the CPU cache for benchmark purposes. Pretty simple minded.
* Might not work in some complex cache topologies.
* When you switch CPUs it's a good idea to clear the cache after testing
* too.
*/
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include "clearcache.h"
unsigned cache_size(void)
{
unsigned cs = 0;
#ifdef _SC_LEVEL1_DCACHE_SIZE
cs += sysconf(_SC_LEVEL1_DCACHE_SIZE);
#endif
#ifdef _SC_LEVEL2_CACHE_SIZE
cs += sysconf(_SC_LEVEL2_CACHE_SIZE);
#endif
#ifdef _SC_LEVEL3_CACHE_SIZE
cs += sysconf(_SC_LEVEL3_CACHE_SIZE);
#endif
#ifdef _SC_LEVEL4_CACHE_SIZE
cs += sysconf(_SC_LEVEL4_CACHE_SIZE);
#endif
if (cs == 0) {
static int warned;
if (!warned) {
printf("Cannot determine CPU cache size\n");
warned = 1;
}
cs = 64*1024*1024;
}
cs *= 2; /* safety factor */
return cs;
}
void fallback_clearcache(void)
{
static unsigned char *clearmem;
unsigned cs = cache_size();
unsigned i;
if (!clearmem)
clearmem = malloc(cs);
if (!clearmem) {
printf("Warning: cannot allocate %u bytes of clear cache buffer\n", cs);
return;
}
for (i = 0; i < cs; i += 32)
clearmem[i] = 1;
}
void clearcache(unsigned char *mem, unsigned size)
{
#if defined(__i386__) || defined(__x86_64__)
unsigned i, cl, eax, feat;
/* get clflush unit and feature */
asm("cpuid" : "=a" (eax), "=b" (cl), "=d" (feat) : "0" (1) : "cx");
if (!(feat & (1 << 19)))
fallback_clearcache();
cl = ((cl >> 8) & 0xff) * 8;
for (i = 0; i < size; i += cl)
asm("clflush %0" :: "m" (mem[i]));
#elif defined(__ia64__)
unsigned long cl, endcl;
// flush probable 128 byte cache lines (but possibly 64 bytes)
cl = (unsigned long)mem;
endcl = (unsigned long)(mem + (size-1));
for (; cl <= endcl; cl += 64)
asm ("fc %0" :: "r"(cl) : "memory" );
#else
#warning "Consider adding a clearcache implementation for your architecture"
fallback_clearcache();
#endif
}
0707010000000B000081A400003EA600000032000000016127204300000034000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/clearcache.hvoid clearcache(unsigned char *mem, unsigned size);
0707010000000C000081A400003EA60000003200000001612720430000042C000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/configure.acAC_PREREQ([2.64])
AC_INIT([numactl], [2.0.14])
AC_CONFIG_SRCDIR([numactl.c])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_AUX_DIR([build-aux])
AC_CONFIG_HEADERS([config.h])
AM_INIT_AUTOMAKE([foreign 1.11 silent-rules subdir-objects parallel-tests])
AM_SILENT_RULES([yes])
LT_PREREQ([2.2])
LT_INIT
AC_PROG_CC
# Override CFLAGS so that we can specify custom CFLAGS for numademo.
AX_AM_OVERRIDE_VAR([CFLAGS])
AX_TLS([:],[:])
AX_CHECK_COMPILE_FLAG([-ftree-vectorize], [tree_vectorize="true"])
AM_CONDITIONAL([HAVE_TREE_VECTORIZE], [test x"${tree_vectorize}" = x"true"])
AC_CONFIG_FILES([Makefile])
AC_SEARCH_LIBS([__atomic_fetch_and_1], [atomic])
# GCC tries to be "helpful" and only issue a warning for unrecognized
# attributes. So we compile the test with Werror, so that if the
# attribute is not recognized the compilation fails
AC_LANG(C)
AC_LANG_WERROR
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[__attribute__ ((symver ("foo@foo_1"))) void frob (void) { }]])],
[AC_DEFINE([HAVE_ATTRIBUTE_SYMVER], [1], [Checking for symver attribute])], [])
AC_OUTPUT
0707010000000D000081A400003EA600000032000000016127204300000B52000000000000000000000000000000000000002600000000numactl-2.0.14.20.g4ee5e0c/distance.c/* Discover distances
Copyright (C) 2005 Andi Kleen, SuSE Labs.
libnuma is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; version
2.1.
libnuma is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should find a copy of v2.1 of the GNU Lesser General Public License
somewhere on your Linux system; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
All calls are undefined when numa_available returns an error. */
#define _GNU_SOURCE 1
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include "numa.h"
#include "numaint.h"
static int distance_numnodes;
static int *distance_table;
static void parse_numbers(char *s, int *iptr)
{
int i, d, j;
char *end;
int maxnode = numa_max_node();
int numnodes = 0;
for (i = 0; i <= maxnode; i++)
if (numa_bitmask_isbitset(numa_nodes_ptr, i))
numnodes++;
for (i = 0, j = 0; i <= maxnode; i++, j++) {
d = strtoul(s, &end, 0);
/* Skip unavailable nodes */
while (j<=maxnode && !numa_bitmask_isbitset(numa_nodes_ptr, j))
j++;
if (s == end)
break;
*(iptr+j) = d;
s = end;
}
}
static int read_distance_table(void)
{
int nd, len;
char *line = NULL;
size_t linelen = 0;
int maxnode = numa_max_node() + 1;
int *table = NULL;
int err = -1;
for (nd = 0;; nd++) {
char fn[100];
FILE *dfh;
sprintf(fn, "/sys/devices/system/node/node%d/distance", nd);
dfh = fopen(fn, "r");
if (!dfh) {
if (errno == ENOENT)
err = 0;
if (!err && nd<maxnode)
continue;
else
break;
}
len = getdelim(&line, &linelen, '\n', dfh);
fclose(dfh);
if (len <= 0)
break;
if (!table) {
table = calloc(maxnode * maxnode, sizeof(int));
if (!table) {
errno = ENOMEM;
break;
}
}
parse_numbers(line, table + nd * maxnode);
}
free(line);
if (err) {
numa_warn(W_distance,
"Cannot parse distance information in sysfs: %s",
strerror(errno));
free(table);
return err;
}
/* Update the global table pointer. Race window here with
other threads, but in the worst case we leak one distance
array one time, which is tolerable. This avoids a
dependency on pthreads. */
if (distance_table) {
free(table);
return 0;
}
distance_numnodes = maxnode;
distance_table = table;
return 0;
}
int numa_distance(int a, int b)
{
if (!distance_table) {
int err = read_distance_table();
if ((err < 0) || (!distance_table))
return 0;
}
if ((unsigned)a >= distance_numnodes || (unsigned)b >= distance_numnodes)
return 0;
return distance_table[a * distance_numnodes + b];
}
0707010000000E000081A400003EA60000003200000001612720430000BD7D000000000000000000000000000000000000002500000000numactl-2.0.14.20.g4ee5e0c/libnuma.c/* Simple NUMA library.
Copyright (C) 2003,2004,2005,2008 Andi Kleen,SuSE Labs and
Cliff Wickman,SGI.
libnuma is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; version
2.1.
libnuma is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should find a copy of v2.1 of the GNU Lesser General Public License
somewhere on your Linux system; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
All calls are undefined when numa_available returns an error. */
#define _GNU_SOURCE 1
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sched.h>
#include <dirent.h>
#include <errno.h>
#include <stdarg.h>
#include <ctype.h>
#include <sys/mman.h>
#include <limits.h>
#include "config.h"
#include "numa.h"
#include "numaif.h"
#include "numaint.h"
#include "util.h"
#include "affinity.h"
#define WEAK __attribute__((weak))
#define CPU_BUFFER_SIZE 4096 /* This limits you to 32768 CPUs */
/* these are the old (version 1) masks */
nodemask_t numa_no_nodes;
nodemask_t numa_all_nodes;
/* these are now the default bitmask (pointers to) (version 2) */
struct bitmask *numa_no_nodes_ptr = NULL;
struct bitmask *numa_all_nodes_ptr = NULL;
struct bitmask *numa_possible_nodes_ptr = NULL;
struct bitmask *numa_all_cpus_ptr = NULL;
struct bitmask *numa_possible_cpus_ptr = NULL;
/* I would prefer to use symbol versioning to create v1 and v2 versions
of numa_no_nodes and numa_all_nodes, but the loader does not correctly
handle versioning of BSS versus small data items */
struct bitmask *numa_nodes_ptr = NULL;
static struct bitmask *numa_memnode_ptr = NULL;
static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES];
static char node_cpu_mask_v1_stale = 1;
static struct bitmask **node_cpu_mask_v2;
static char node_cpu_mask_v2_stale = 1;
WEAK void numa_error(char *where);
#ifndef TLS
#warning "not threadsafe"
#define __thread
#endif
static __thread int bind_policy = MPOL_BIND;
static __thread unsigned int mbind_flags = 0;
static int sizes_set=0;
static int maxconfigurednode = -1;
static int maxconfiguredcpu = -1;
static int numprocnode = -1;
static int numproccpu = -1;
static int nodemask_sz = 0;
static int cpumask_sz = 0;
int numa_exit_on_error = 0;
int numa_exit_on_warn = 0;
static void set_sizes(void);
/*
* There are two special functions, _init(void) and _fini(void), which
* are called automatically by the dynamic loader whenever a library is loaded.
*
* The v1 library depends upon nodemask_t's of all nodes and no nodes.
*/
void __attribute__((constructor))
numa_init(void)
{
int max,i;
if (sizes_set)
return;
set_sizes();
/* numa_all_nodes should represent existing nodes on this system */
max = numa_num_configured_nodes();
for (i = 0; i < max; i++)
nodemask_set_compat((nodemask_t *)&numa_all_nodes, i);
memset(&numa_no_nodes, 0, sizeof(numa_no_nodes));
}
static void cleanup_node_cpu_mask_v2(void);
#define FREE_AND_ZERO(x) if (x) { \
numa_bitmask_free(x); \
x = NULL; \
}
void __attribute__((destructor))
numa_fini(void)
{
FREE_AND_ZERO(numa_all_cpus_ptr);
FREE_AND_ZERO(numa_possible_cpus_ptr);
FREE_AND_ZERO(numa_all_nodes_ptr);
FREE_AND_ZERO(numa_possible_nodes_ptr);
FREE_AND_ZERO(numa_no_nodes_ptr);
FREE_AND_ZERO(numa_memnode_ptr);
FREE_AND_ZERO(numa_nodes_ptr);
cleanup_node_cpu_mask_v2();
}
/*
* The following bitmask declarations, bitmask_*() routines, and associated
* _setbit() and _getbit() routines are:
* Copyright (c) 2004_2007 Silicon Graphics, Inc. (SGI) All rights reserved.
* SGI publishes it under the terms of the GNU General Public License, v2,
* as published by the Free Software Foundation.
*/
static unsigned int
_getbit(const struct bitmask *bmp, unsigned int n)
{
if (n < bmp->size)
return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1;
else
return 0;
}
static void
_setbit(struct bitmask *bmp, unsigned int n, unsigned int v)
{
if (n < bmp->size) {
if (v)
bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong);
else
bmp->maskp[n/bitsperlong] &= ~(1UL << (n % bitsperlong));
}
}
int
numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int i)
{
return _getbit(bmp, i);
}
struct bitmask *
numa_bitmask_setall(struct bitmask *bmp)
{
unsigned int i;
for (i = 0; i < bmp->size; i++)
_setbit(bmp, i, 1);
return bmp;
}
struct bitmask *
numa_bitmask_clearall(struct bitmask *bmp)
{
unsigned int i;
for (i = 0; i < bmp->size; i++)
_setbit(bmp, i, 0);
return bmp;
}
struct bitmask *
numa_bitmask_setbit(struct bitmask *bmp, unsigned int i)
{
_setbit(bmp, i, 1);
return bmp;
}
struct bitmask *
numa_bitmask_clearbit(struct bitmask *bmp, unsigned int i)
{
_setbit(bmp, i, 0);
return bmp;
}
unsigned int
numa_bitmask_nbytes(struct bitmask *bmp)
{
return longsperbits(bmp->size) * sizeof(unsigned long);
}
/* where n is the number of bits in the map */
/* This function should not exit on failure, but right now we cannot really
recover from this. */
struct bitmask *
numa_bitmask_alloc(unsigned int n)
{
struct bitmask *bmp;
if (n < 1) {
errno = EINVAL;
numa_error("request to allocate mask for invalid number");
exit(1);
}
bmp = malloc(sizeof(*bmp));
if (bmp == 0)
goto oom;
bmp->size = n;
bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long));
if (bmp->maskp == 0) {
free(bmp);
goto oom;
}
return bmp;
oom:
numa_error("Out of memory allocating bitmask");
exit(1);
}
void
numa_bitmask_free(struct bitmask *bmp)
{
if (bmp == 0)
return;
free(bmp->maskp);
bmp->maskp = (unsigned long *)0xdeadcdef; /* double free tripwire */
free(bmp);
return;
}
/* True if two bitmasks are equal */
int
numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask *bmp2)
{
unsigned int i;
for (i = 0; i < bmp1->size || i < bmp2->size; i++)
if (_getbit(bmp1, i) != _getbit(bmp2, i))
return 0;
return 1;
}
/* Hamming Weight: number of set bits */
unsigned int numa_bitmask_weight(const struct bitmask *bmp)
{
unsigned int i;
unsigned int w = 0;
for (i = 0; i < bmp->size; i++)
if (_getbit(bmp, i))
w++;
return w;
}
/* *****end of bitmask_ routines ************ */
/* Next two can be overwritten by the application for different error handling */
WEAK void numa_error(char *where)
{
int olde = errno;
perror(where);
if (numa_exit_on_error)
exit(1);
errno = olde;
}
WEAK void numa_warn(int num, char *fmt, ...)
{
static unsigned warned;
va_list ap;
int olde = errno;
/* Give each warning only once */
if ((1<<num) & warned)
return;
warned |= (1<<num);
va_start(ap,fmt);
fprintf(stderr, "libnuma: Warning: ");
vfprintf(stderr, fmt, ap);
fputc('\n', stderr);
va_end(ap);
errno = olde;
}
static void setpol(int policy, struct bitmask *bmp)
{
if (set_mempolicy(policy, bmp->maskp, bmp->size + 1) < 0)
numa_error("set_mempolicy");
}
static void getpol(int *oldpolicy, struct bitmask *bmp)
{
if (get_mempolicy(oldpolicy, bmp->maskp, bmp->size + 1, 0, 0) < 0)
numa_error("get_mempolicy");
}
static void dombind(void *mem, size_t size, int pol, struct bitmask *bmp)
{
if (mbind(mem, size, pol, bmp ? bmp->maskp : NULL, bmp ? bmp->size + 1 : 0,
mbind_flags) < 0)
numa_error("mbind");
}
/* (undocumented) */
/* gives the wrong answer for hugetlbfs mappings. */
int numa_pagesize(void)
{
static int pagesize;
if (pagesize > 0)
return pagesize;
pagesize = getpagesize();
return pagesize;
}
make_internal_alias(numa_pagesize);
/*
* Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr)
* and the highest numbered existing node (maxconfigurednode).
*/
static void
set_configured_nodes(void)
{
DIR *d;
struct dirent *de;
long long freep;
numa_memnode_ptr = numa_allocate_nodemask();
numa_nodes_ptr = numa_allocate_nodemask();
d = opendir("/sys/devices/system/node");
if (!d) {
maxconfigurednode = 0;
} else {
while ((de = readdir(d)) != NULL) {
int nd;
if (strncmp(de->d_name, "node", 4))
continue;
nd = strtoul(de->d_name+4, NULL, 0);
numa_bitmask_setbit(numa_nodes_ptr, nd);
if (numa_node_size64(nd, &freep) > 0)
numa_bitmask_setbit(numa_memnode_ptr, nd);
if (maxconfigurednode < nd)
maxconfigurednode = nd;
}
closedir(d);
}
}
/*
* Convert the string length of an ascii hex mask to the number
* of bits represented by that mask.
*/
static int s2nbits(const char *s)
{
return strlen(s) * 32 / 9;
}
/* Is string 'pre' a prefix of string 's'? */
static int strprefix(const char *s, const char *pre)
{
return strncmp(s, pre, strlen(pre)) == 0;
}
static const char *mask_size_file = "/proc/self/status";
static const char *nodemask_prefix = "Mems_allowed:\t";
/*
* (do this the way Paul Jackson's libcpuset does it)
* The nodemask values in /proc/self/status are in an
* ascii format that uses 9 characters for each 32 bits of mask.
* (this could also be used to find the cpumask size)
*/
static void
set_nodemask_size(void)
{
FILE *fp;
char *buf = NULL;
size_t bufsize = 0;
if ((fp = fopen(mask_size_file, "r")) == NULL)
goto done;
while (getline(&buf, &bufsize, fp) > 0) {
if (strprefix(buf, nodemask_prefix)) {
nodemask_sz = s2nbits(buf + strlen(nodemask_prefix));
break;
}
}
free(buf);
fclose(fp);
done:
if (nodemask_sz == 0) {/* fall back on error */
int pol;
unsigned long *mask = NULL;
nodemask_sz = 16;
do {
nodemask_sz <<= 1;
mask = realloc(mask, nodemask_sz / 8);
if (!mask)
return;
} while (get_mempolicy(&pol, mask, nodemask_sz + 1, 0, 0) < 0 && errno == EINVAL &&
nodemask_sz < 4096*8);
free(mask);
}
}
/*
* Read a mask consisting of a sequence of hexadecimal longs separated by
* commas. Order them correctly and return the number of bits set.
*/
static int
read_mask(char *s, struct bitmask *bmp)
{
char *end = s;
int tmplen = (bmp->size + bitsperint - 1) / bitsperint;
unsigned int tmp[tmplen];
unsigned int *start = tmp;
unsigned int i, n = 0, m = 0;
if (!s)
return 0; /* shouldn't happen */
i = strtoul(s, &end, 16);
/* Skip leading zeros */
while (!i && *end++ == ',') {
i = strtoul(end, &end, 16);
}
if (!i)
/* End of string. No mask */
return -1;
start[n++] = i;
/* Read sequence of ints */
while (*end++ == ',') {
i = strtoul(end, &end, 16);
start[n++] = i;
/* buffer overflow */
if (n > tmplen)
return -1;
}
/*
* Invert sequence of ints if necessary since the first int
* is the highest and we put it first because we read it first.
*/
while (n) {
int w;
unsigned long x = 0;
/* read into long values in an endian-safe way */
for (w = 0; n && w < bitsperlong; w += bitsperint)
x |= ((unsigned long)start[n-- - 1] << w);
bmp->maskp[m++] = x;
}
/*
* Return the number of bits set
*/
return numa_bitmask_weight(bmp);
}
/*
* Read a processes constraints in terms of nodes and cpus from
* /proc/self/status.
*/
static void
set_task_constraints(void)
{
int hicpu = maxconfiguredcpu;
int i;
char *buffer = NULL;
size_t buflen = 0;
FILE *f;
numa_all_cpus_ptr = numa_allocate_cpumask();
numa_possible_cpus_ptr = numa_allocate_cpumask();
numa_all_nodes_ptr = numa_allocate_nodemask();
numa_possible_nodes_ptr = numa_allocate_cpumask();
numa_no_nodes_ptr = numa_allocate_nodemask();
f = fopen(mask_size_file, "r");
if (!f) {
//numa_warn(W_cpumap, "Cannot parse %s", mask_size_file);
return;
}
while (getline(&buffer, &buflen, f) > 0) {
/* mask starts after [last] tab */
char *mask = strrchr(buffer,'\t') + 1;
if (strncmp(buffer,"Cpus_allowed:",13) == 0)
numproccpu = read_mask(mask, numa_all_cpus_ptr);
if (strncmp(buffer,"Mems_allowed:",13) == 0) {
numprocnode = read_mask(mask, numa_all_nodes_ptr);
}
}
fclose(f);
free(buffer);
for (i = 0; i <= hicpu; i++)
numa_bitmask_setbit(numa_possible_cpus_ptr, i);
for (i = 0; i <= maxconfigurednode; i++)
numa_bitmask_setbit(numa_possible_nodes_ptr, i);
/*
* Cpus_allowed in the kernel can be defined to all f's
* i.e. it may be a superset of the actual available processors.
* As such let's reduce numproccpu to the number of actual
* available cpus.
*/
if (numproccpu <= 0) {
for (i = 0; i <= hicpu; i++)
numa_bitmask_setbit(numa_all_cpus_ptr, i);
numproccpu = hicpu+1;
}
if (numproccpu > hicpu+1) {
numproccpu = hicpu+1;
for (i=hicpu+1; i<numa_all_cpus_ptr->size; i++) {
numa_bitmask_clearbit(numa_all_cpus_ptr, i);
}
}
if (numprocnode <= 0) {
for (i = 0; i <= maxconfigurednode; i++)
numa_bitmask_setbit(numa_all_nodes_ptr, i);
numprocnode = maxconfigurednode + 1;
}
return;
}
/*
* Find the highest cpu number possible (in other words the size
* of a kernel cpumask_t (in bits) - 1)
*/
static void
set_numa_max_cpu(void)
{
int len = 4096;
int n;
int olde = errno;
struct bitmask *buffer;
do {
buffer = numa_bitmask_alloc(len);
n = numa_sched_getaffinity_v2_int(0, buffer);
/* on success, returns size of kernel cpumask_t, in bytes */
if (n < 0) {
if (errno == EINVAL) {
if (len >= 1024*1024)
break;
len *= 2;
numa_bitmask_free(buffer);
continue;
} else {
numa_warn(W_numcpus, "Unable to determine max cpu"
" (sched_getaffinity: %s); guessing...",
strerror(errno));
n = sizeof(cpu_set_t);
break;
}
}
} while (n < 0);
numa_bitmask_free(buffer);
errno = olde;
cpumask_sz = n*8;
}
/*
* get the total (configured) number of cpus - both online and offline
*/
static void
set_configured_cpus(void)
{
maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1;
if (maxconfiguredcpu == -1)
numa_error("sysconf(NPROCESSORS_CONF) failed");
}
/*
* Initialize all the sizes.
*/
static void
set_sizes(void)
{
sizes_set++;
set_nodemask_size(); /* size of kernel nodemask_t */
set_configured_nodes(); /* configured nodes listed in /sys */
set_numa_max_cpu(); /* size of kernel cpumask_t */
set_configured_cpus(); /* cpus listed in /sys/devices/system/cpu */
set_task_constraints(); /* cpus and nodes for current task */
}
int
numa_num_configured_nodes(void)
{
/*
* NOTE: this function's behavior matches the documentation (ie: it
* returns a count of nodes with memory) despite the poor function
* naming. We also cannot use the similarly poorly named
* numa_all_nodes_ptr as it only tracks nodes with memory from which
* the calling process can allocate. Think sparse nodes, memory-less
* nodes, cpusets...
*/
int memnodecount=0, i;
for (i=0; i <= maxconfigurednode; i++) {
if (numa_bitmask_isbitset(numa_memnode_ptr, i))
memnodecount++;
}
return memnodecount;
}
int
numa_num_configured_cpus(void)
{
return maxconfiguredcpu+1;
}
int
numa_num_possible_nodes(void)
{
return nodemask_sz;
}
int
numa_num_possible_cpus(void)
{
return cpumask_sz;
}
int
numa_num_task_nodes(void)
{
return numprocnode;
}
/*
* for backward compatibility
*/
int
numa_num_thread_nodes(void)
{
return numa_num_task_nodes();
}
int
numa_num_task_cpus(void)
{
return numproccpu;
}
/*
* for backward compatibility
*/
int
numa_num_thread_cpus(void)
{
return numa_num_task_cpus();
}
/*
* Return the number of the highest node in this running system,
*/
int
numa_max_node(void)
{
return maxconfigurednode;
}
make_internal_alias(numa_max_node);
/*
* Return the number of the highest possible node in a system,
* which for v1 is the size of a numa.h nodemask_t(in bits)-1.
* but for v2 is the size of a kernel nodemask_t(in bits)-1.
*/
SYMVER("numa_max_possible_node_v1", "numa_max_possible_node@libnuma_1.1")
int
numa_max_possible_node_v1(void)
{
return ((sizeof(nodemask_t)*8)-1);
}
SYMVER("numa_max_possible_node_v2", "numa_max_possible_node@@libnuma_1.2")
int
numa_max_possible_node_v2(void)
{
return numa_num_possible_nodes()-1;
}
make_internal_alias(numa_max_possible_node_v1);
make_internal_alias(numa_max_possible_node_v2);
/*
* Allocate a bitmask for cpus, of a size large enough to
* match the kernel's cpumask_t.
*/
struct bitmask *
numa_allocate_cpumask()
{
int ncpus = numa_num_possible_cpus();
return numa_bitmask_alloc(ncpus);
}
/*
* Allocate a bitmask the size of a libnuma nodemask_t
*/
static struct bitmask *
allocate_nodemask_v1(void)
{
int nnodes = numa_max_possible_node_v1_int()+1;
return numa_bitmask_alloc(nnodes);
}
/*
* Allocate a bitmask for nodes, of a size large enough to
* match the kernel's nodemask_t.
*/
struct bitmask *
numa_allocate_nodemask(void)
{
struct bitmask *bmp;
int nnodes = numa_max_possible_node_v2_int() + 1;
bmp = numa_bitmask_alloc(nnodes);
return bmp;
}
/* (cache the result?) */
long long numa_node_size64(int node, long long *freep)
{
size_t len = 0;
char *line = NULL;
long long size = -1;
FILE *f;
char fn[64];
int ok = 0;
int required = freep ? 2 : 1;
if (freep)
*freep = -1;
sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node);
f = fopen(fn, "r");
if (!f)
return -1;
while (getdelim(&line, &len, '\n', f) > 0) {
char *end;
char *s = strcasestr(line, "kB");
if (!s)
continue;
--s;
while (s > line && isspace(*s))
--s;
while (s > line && isdigit(*s))
--s;
if (strstr(line, "MemTotal")) {
size = strtoull(s,&end,0) << 10;
if (end == s)
size = -1;
else
ok++;
}
if (freep && strstr(line, "MemFree")) {
*freep = strtoull(s,&end,0) << 10;
if (end == s)
*freep = -1;
else
ok++;
}
}
fclose(f);
free(line);
if (ok != required)
numa_warn(W_badmeminfo, "Cannot parse sysfs meminfo (%d)", ok);
return size;
}
make_internal_alias(numa_node_size64);
long long numa_node_size(int node, long long *freep)
{
long long f2;
long long sz = numa_node_size64_int(node, &f2);
if (freep)
*freep = f2;
return sz;
}
int numa_available(void)
{
if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
return -1;
return 0;
}
SYMVER("numa_interleave_memory_v1", "numa_interleave_memory@libnuma_1.1")
void
numa_interleave_memory_v1(void *mem, size_t size, const nodemask_t *mask)
{
struct bitmask bitmask;
bitmask.size = sizeof(nodemask_t) * 8;
bitmask.maskp = (unsigned long *)mask;
dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
}
SYMVER("numa_interleave_memory_v2", "numa_interleave_memory@@libnuma_1.2")
void
numa_interleave_memory_v2(void *mem, size_t size, struct bitmask *bmp)
{
dombind(mem, size, MPOL_INTERLEAVE, bmp);
}
void numa_tonode_memory(void *mem, size_t size, int node)
{
struct bitmask *nodes;
nodes = numa_allocate_nodemask();
numa_bitmask_setbit(nodes, node);
dombind(mem, size, bind_policy, nodes);
numa_bitmask_free(nodes);
}
SYMVER("numa_tonodemask_memory_v1", "numa_tonodemask_memory@libnuma_1.1")
void
numa_tonodemask_memory_v1(void *mem, size_t size, const nodemask_t *mask)
{
struct bitmask bitmask;
bitmask.maskp = (unsigned long *)mask;
bitmask.size = sizeof(nodemask_t);
dombind(mem, size, bind_policy, &bitmask);
}
SYMVER("numa_tonodemask_memory_v2", "numa_tonodemask_memory@@libnuma_1.2")
void
numa_tonodemask_memory_v2(void *mem, size_t size, struct bitmask *bmp)
{
dombind(mem, size, bind_policy, bmp);
}
void numa_setlocal_memory(void *mem, size_t size)
{
dombind(mem, size, MPOL_PREFERRED, NULL);
}
void numa_police_memory(void *mem, size_t size)
{
int pagesize = numa_pagesize_int();
unsigned long i;
char *p = mem;
for (i = 0; i < size; i += pagesize, p += pagesize)
__atomic_and_fetch(p, 0xff, __ATOMIC_RELAXED);
}
make_internal_alias(numa_police_memory);
void *numa_alloc(size_t size)
{
char *mem;
mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (mem == (char *)-1)
return NULL;
numa_police_memory_int(mem, size);
return mem;
}
void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
{
char *mem;
mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
if (mem == (char *)-1)
return NULL;
/*
* The memory policy of the allocated pages is preserved by mremap(), so
* there is no need to (re)set it here. If the policy of the original
* allocation is not set, the new pages will be allocated according to the
* process' mempolicy. Trying to allocate explicitly the new pages on the
* same node as the original ones would require changing the policy of the
* newly allocated pages, which violates the numa_realloc() semantics.
*/
return mem;
}
SYMVER("numa_alloc_interleaved_subset_v1", "numa_alloc_interleaved_subset@libnuma_1.1")
void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
{
char *mem;
struct bitmask bitmask;
mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (mem == (char *)-1)
return NULL;
bitmask.maskp = (unsigned long *)mask;
bitmask.size = sizeof(nodemask_t);
dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
return mem;
}
SYMVER("numa_alloc_interleaved_subset_v2", "numa_alloc_interleaved_subset@@libnuma_1.2")
void *numa_alloc_interleaved_subset_v2(size_t size, struct bitmask *bmp)
{
char *mem;
mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (mem == (char *)-1)
return NULL;
dombind(mem, size, MPOL_INTERLEAVE, bmp);
return mem;
}
make_internal_alias(numa_alloc_interleaved_subset_v1);
make_internal_alias(numa_alloc_interleaved_subset_v2);
void *
numa_alloc_interleaved(size_t size)
{
return numa_alloc_interleaved_subset_v2_int(size, numa_all_nodes_ptr);
}
/*
* given a user node mask, set memory policy to use those nodes
*/
SYMVER("numa_set_interleave_mask_v1", "numa_set_interleave_mask@libnuma_1.1")
void
numa_set_interleave_mask_v1(nodemask_t *mask)
{
struct bitmask *bmp;
int nnodes = numa_max_possible_node_v1_int()+1;
bmp = numa_bitmask_alloc(nnodes);
copy_nodemask_to_bitmask(mask, bmp);
if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
setpol(MPOL_DEFAULT, bmp);
else
setpol(MPOL_INTERLEAVE, bmp);
numa_bitmask_free(bmp);
}
SYMVER("numa_set_interleave_mask_v2", "numa_set_interleave_mask@@libnuma_1.2")
void
numa_set_interleave_mask_v2(struct bitmask *bmp)
{
if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
setpol(MPOL_DEFAULT, bmp);
else
setpol(MPOL_INTERLEAVE, bmp);
}
SYMVER("numa_get_interleave_mask_v1", "numa_get_interleave_mask@libnuma_1.1")
nodemask_t
numa_get_interleave_mask_v1(void)
{
int oldpolicy;
struct bitmask *bmp;
nodemask_t mask;
bmp = allocate_nodemask_v1();
getpol(&oldpolicy, bmp);
if (oldpolicy == MPOL_INTERLEAVE)
copy_bitmask_to_nodemask(bmp, &mask);
else
copy_bitmask_to_nodemask(numa_no_nodes_ptr, &mask);
numa_bitmask_free(bmp);
return mask;
}
SYMVER("numa_get_interleave_mask_v2", "numa_get_interleave_mask@@libnuma_1.2")
struct bitmask *
numa_get_interleave_mask_v2(void)
{
int oldpolicy;
struct bitmask *bmp;
bmp = numa_allocate_nodemask();
getpol(&oldpolicy, bmp);
if (oldpolicy != MPOL_INTERLEAVE)
copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
return bmp;
}
/* (undocumented) */
int numa_get_interleave_node(void)
{
int nd;
if (get_mempolicy(&nd, NULL, 0, 0, MPOL_F_NODE) == 0)
return nd;
return 0;
}
void *numa_alloc_onnode(size_t size, int node)
{
char *mem;
struct bitmask *bmp;
bmp = numa_allocate_nodemask();
numa_bitmask_setbit(bmp, node);
mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (mem == (char *)-1)
mem = NULL;
else
dombind(mem, size, bind_policy, bmp);
numa_bitmask_free(bmp);
return mem;
}
void *numa_alloc_local(size_t size)
{
char *mem;
mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (mem == (char *)-1)
mem = NULL;
else
dombind(mem, size, MPOL_PREFERRED, NULL);
return mem;
}
void numa_set_bind_policy(int strict)
{
if (strict)
bind_policy = MPOL_BIND;
else
bind_policy = MPOL_PREFERRED;
}
SYMVER("numa_set_membind_v1", "numa_set_membind@libnuma_1.1")
void
numa_set_membind_v1(const nodemask_t *mask)
{
struct bitmask bitmask;
bitmask.maskp = (unsigned long *)mask;
bitmask.size = sizeof(nodemask_t);
setpol(MPOL_BIND, &bitmask);
}
SYMVER("numa_set_membind_v2", "numa_set_membind@@libnuma_1.2")
void
numa_set_membind_v2(struct bitmask *bmp)
{
setpol(MPOL_BIND, bmp);
}
make_internal_alias(numa_set_membind_v2);
void
numa_set_membind_balancing(struct bitmask *bmp)
{
/* MPOL_F_NUMA_BALANCING: ignore if unsupported */
if (set_mempolicy(MPOL_BIND | MPOL_F_NUMA_BALANCING,
bmp->maskp, bmp->size + 1) < 0) {
if (errno == EINVAL) {
errno = 0;
numa_set_membind_v2(bmp);
} else
numa_error("set_mempolicy");
}
}
/*
* copy a bitmask map body to a numa.h nodemask_t structure
*/
void
copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *nmp)
{
int max, i;
memset(nmp, 0, sizeof(nodemask_t));
max = (sizeof(nodemask_t)*8);
for (i=0; i<bmp->size; i++) {
if (i >= max)
break;
if (numa_bitmask_isbitset(bmp, i))
nodemask_set_compat((nodemask_t *)nmp, i);
}
}
/*
* copy a bitmask map body to another bitmask body
* fill a larger destination with zeroes
*/
void
copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask *bmpto)
{
int bytes;
if (bmpfrom->size >= bmpto->size) {
memcpy(bmpto->maskp, bmpfrom->maskp, CPU_BYTES(bmpto->size));
} else if (bmpfrom->size < bmpto->size) {
bytes = CPU_BYTES(bmpfrom->size);
memcpy(bmpto->maskp, bmpfrom->maskp, bytes);
memset(((char *)bmpto->maskp)+bytes, 0,
CPU_BYTES(bmpto->size)-bytes);
}
}
/*
* copy a numa.h nodemask_t structure to a bitmask map body
*/
void
copy_nodemask_to_bitmask(nodemask_t *nmp, struct bitmask *bmp)
{
int max, i;
numa_bitmask_clearall(bmp);
max = (sizeof(nodemask_t)*8);
if (max > bmp->size)
max = bmp->size;
for (i=0; i<max; i++) {
if (nodemask_isset_compat(nmp, i))
numa_bitmask_setbit(bmp, i);
}
}
SYMVER("numa_get_membind_v1", "numa_get_membind@libnuma_1.1")
nodemask_t
numa_get_membind_v1(void)
{
int oldpolicy;
struct bitmask *bmp;
nodemask_t nmp;
bmp = allocate_nodemask_v1();
getpol(&oldpolicy, bmp);
if (oldpolicy == MPOL_BIND) {
copy_bitmask_to_nodemask(bmp, &nmp);
} else {
/* copy the body of the map to numa_all_nodes */
copy_bitmask_to_nodemask(bmp, &numa_all_nodes);
nmp = numa_all_nodes;
}
numa_bitmask_free(bmp);
return nmp;
}
SYMVER("numa_get_membind_v2", "numa_get_membind@@libnuma_1.2")
struct bitmask *
numa_get_membind_v2(void)
{
int oldpolicy;
struct bitmask *bmp;
bmp = numa_allocate_nodemask();
getpol(&oldpolicy, bmp);
if (oldpolicy != MPOL_BIND)
copy_bitmask_to_bitmask(numa_all_nodes_ptr, bmp);
return bmp;
}
//TODO: do we need a v1 nodemask_t version?
struct bitmask *numa_get_mems_allowed(void)
{
struct bitmask *bmp;
/*
* can change, so query on each call.
*/
bmp = numa_allocate_nodemask();
if (get_mempolicy(NULL, bmp->maskp, bmp->size + 1, 0,
MPOL_F_MEMS_ALLOWED) < 0)
numa_error("get_mempolicy");
return bmp;
}
make_internal_alias(numa_get_mems_allowed);
void numa_free(void *mem, size_t size)
{
munmap(mem, size);
}
SYMVER("numa_parse_bitmap_v1", "numa_parse_bitmap@libnuma_1.1")
int
numa_parse_bitmap_v1(char *line, unsigned long *mask, int ncpus)
{
int i;
char *p = strchr(line, '\n');
if (!p)
return -1;
for (i = 0; p > line;i++) {
char *oldp, *endp;
oldp = p;
if (*p == ',')
--p;
while (p > line && *p != ',')
--p;
/* Eat two 32bit fields at a time to get longs */
if (p > line && sizeof(unsigned long) == 8) {
oldp--;
memmove(p, p+1, oldp-p+1);
while (p > line && *p != ',')
--p;
}
if (*p == ',')
p++;
if (i >= CPU_LONGS(ncpus))
return -1;
mask[i] = strtoul(p, &endp, 16);
if (endp != oldp)
return -1;
p--;
}
return 0;
}
SYMVER("numa_parse_bitmap_v2", "numa_parse_bitmap@@libnuma_1.2")
int
numa_parse_bitmap_v2(char *line, struct bitmask *mask)
{
int i, ncpus;
char *p = strchr(line, '\n');
if (!p)
return -1;
ncpus = mask->size;
for (i = 0; p > line;i++) {
char *oldp, *endp;
oldp = p;
if (*p == ',')
--p;
while (p > line && *p != ',')
--p;
/* Eat two 32bit fields at a time to get longs */
if (p > line && sizeof(unsigned long) == 8) {
oldp--;
memmove(p, p+1, oldp-p+1);
while (p > line && *p != ',')
--p;
}
if (*p == ',')
p++;
if (i >= CPU_LONGS(ncpus))
return -1;
mask->maskp[i] = strtoul(p, &endp, 16);
if (endp != oldp)
return -1;
p--;
}
return 0;
}
static void init_node_cpu_mask_v2(void)
{
int nnodes = numa_max_possible_node_v2_int() + 1;
node_cpu_mask_v2 = calloc (nnodes, sizeof(struct bitmask *));
}
static void cleanup_node_cpu_mask_v2(void)
{
if (node_cpu_mask_v2) {
int i;
int nnodes;
nnodes = numa_max_possible_node_v2_int() + 1;
for (i = 0; i < nnodes; i++) {
FREE_AND_ZERO(node_cpu_mask_v2[i]);
}
free(node_cpu_mask_v2);
node_cpu_mask_v2 = NULL;
}
}
/* This would be better with some locking, but I don't want to make libnuma
dependent on pthreads right now. The races are relatively harmless. */
SYMVER("numa_node_to_cpus_v1", "numa_node_to_cpus@libnuma_1.1")
int
numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
{
int err = 0;
char fn[64];
FILE *f;
char update;
char *line = NULL;
size_t len = 0;
struct bitmask bitmask;
int buflen_needed;
unsigned long *mask;
int ncpus = numa_num_possible_cpus();
int maxnode = numa_max_node_int();
buflen_needed = CPU_BYTES(ncpus);
if ((unsigned)node > maxnode || bufferlen < buflen_needed) {
errno = ERANGE;
return -1;
}
if (bufferlen > buflen_needed)
memset(buffer, 0, bufferlen);
update = __atomic_fetch_and(&node_cpu_mask_v1_stale, 0, __ATOMIC_RELAXED);
if (node_cpu_mask_v1[node] && !update) {
memcpy(buffer, node_cpu_mask_v1[node], buflen_needed);
return 0;
}
mask = malloc(buflen_needed);
if (!mask)
mask = (unsigned long *)buffer;
memset(mask, 0, buflen_needed);
sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
f = fopen(fn, "r");
if (!f || getdelim(&line, &len, '\n', f) < 1) {
if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
numa_warn(W_nosysfs2,
"/sys not mounted or invalid. Assuming one node: %s",
strerror(errno));
numa_warn(W_nosysfs2,
"(cannot open or correctly parse %s)", fn);
}
bitmask.maskp = (unsigned long *)mask;
bitmask.size = buflen_needed * 8;
numa_bitmask_setall(&bitmask);
err = -1;
}
if (f)
fclose(f);
if (line && (numa_parse_bitmap_v1(line, mask, ncpus) < 0)) {
numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
bitmask.maskp = (unsigned long *)mask;
bitmask.size = buflen_needed * 8;
numa_bitmask_setall(&bitmask);
err = -1;
}
free(line);
memcpy(buffer, mask, buflen_needed);
/* slightly racy, see above */
if (node_cpu_mask_v1[node]) {
if (update) {
/*
* There may be readers on node_cpu_mask_v1[], hence it can not
* be freed.
*/
memcpy(node_cpu_mask_v1[node], mask, buflen_needed);
free(mask);
mask = NULL;
} else if (mask != buffer)
free(mask);
} else {
node_cpu_mask_v1[node] = mask;
}
return err;
}
/*
* test whether a node has cpus
*/
/* This would be better with some locking, but I don't want to make libnuma
dependent on pthreads right now. The races are relatively harmless. */
/*
* deliver a bitmask of cpus representing the cpus on a given node
*/
SYMVER("numa_node_to_cpus_v2", "numa_node_to_cpus@@libnuma_1.2")
int
numa_node_to_cpus_v2(int node, struct bitmask *buffer)
{
int err = 0;
int nnodes = numa_max_node();
char fn[64], *line = NULL;
FILE *f;
char update;
size_t len = 0;
struct bitmask *mask;
if (!node_cpu_mask_v2)
init_node_cpu_mask_v2();
if (node > nnodes) {
errno = ERANGE;
return -1;
}
numa_bitmask_clearall(buffer);
update = __atomic_fetch_and(&node_cpu_mask_v2_stale, 0, __ATOMIC_RELAXED);
if (node_cpu_mask_v2[node] && !update) {
/* have already constructed a mask for this node */
if (buffer->size < node_cpu_mask_v2[node]->size) {
errno = EINVAL;
numa_error("map size mismatch");
return -1;
}
copy_bitmask_to_bitmask(node_cpu_mask_v2[node], buffer);
return 0;
}
/* need a new mask for this node */
mask = numa_allocate_cpumask();
/* this is a kernel cpumask_t (see node_read_cpumap()) */
sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
f = fopen(fn, "r");
if (!f || getdelim(&line, &len, '\n', f) < 1) {
if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
numa_warn(W_nosysfs2,
"/sys not mounted or invalid. Assuming one node: %s",
strerror(errno));
numa_warn(W_nosysfs2,
"(cannot open or correctly parse %s)", fn);
}
numa_bitmask_setall(mask);
err = -1;
}
if (f)
fclose(f);
if (line && (numa_parse_bitmap_v2(line, mask) < 0)) {
numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
numa_bitmask_setall(mask);
err = -1;
}
free(line);
copy_bitmask_to_bitmask(mask, buffer);
/* slightly racy, see above */
/* save the mask we created */
if (node_cpu_mask_v2[node]) {
if (update) {
copy_bitmask_to_bitmask(mask, node_cpu_mask_v2[node]);
numa_bitmask_free(mask);
mask = NULL;
/* how could this be? */
} else if (mask != buffer)
numa_bitmask_free(mask);
} else {
/* we don't want to cache faulty result */
if (!err)
node_cpu_mask_v2[node] = mask;
else
numa_bitmask_free(mask);
}
return err;
}
make_internal_alias(numa_node_to_cpus_v1);
make_internal_alias(numa_node_to_cpus_v2);
void numa_node_to_cpu_update(void)
{
__atomic_store_n(&node_cpu_mask_v1_stale, 1, __ATOMIC_RELAXED);
__atomic_store_n(&node_cpu_mask_v2_stale, 1, __ATOMIC_RELAXED);
}
/* report the node of the specified cpu */
int numa_node_of_cpu(int cpu)
{
struct bitmask *bmp;
int ncpus, nnodes, node, ret;
ncpus = numa_num_possible_cpus();
if (cpu > ncpus){
errno = EINVAL;
return -1;
}
bmp = numa_bitmask_alloc(ncpus);
nnodes = numa_max_node();
for (node = 0; node <= nnodes; node++){
if (numa_node_to_cpus_v2_int(node, bmp) < 0) {
/* It's possible for the node to not exist */
continue;
}
if (numa_bitmask_isbitset(bmp, cpu)){
ret = node;
goto end;
}
}
ret = -1;
errno = EINVAL;
end:
numa_bitmask_free(bmp);
return ret;
}
SYMVER("numa_run_on_node_mask_v1", "numa_run_on_node_mask@libnuma_1.1")
int
numa_run_on_node_mask_v1(const nodemask_t *mask)
{
int ncpus = numa_num_possible_cpus();
int i, k, err;
unsigned long cpus[CPU_LONGS(ncpus)], nodecpus[CPU_LONGS(ncpus)];
memset(cpus, 0, CPU_BYTES(ncpus));
for (i = 0; i < NUMA_NUM_NODES; i++) {
if (mask->n[i / BITS_PER_LONG] == 0)
continue;
if (nodemask_isset_compat(mask, i)) {
if (numa_node_to_cpus_v1_int(i, nodecpus, CPU_BYTES(ncpus)) < 0) {
numa_warn(W_noderunmask,
"Cannot read node cpumask from sysfs");
continue;
}
for (k = 0; k < CPU_LONGS(ncpus); k++)
cpus[k] |= nodecpus[k];
}
}
err = numa_sched_setaffinity_v1(0, CPU_BYTES(ncpus), cpus);
/* The sched_setaffinity API is broken because it expects
the user to guess the kernel cpuset size. Do this in a
brute force way. */
if (err < 0 && errno == EINVAL) {
int savederrno = errno;
char *bigbuf;
static int size = -1;
if (size == -1)
size = CPU_BYTES(ncpus) * 2;
bigbuf = malloc(CPU_BUFFER_SIZE);
if (!bigbuf) {
errno = ENOMEM;
return -1;
}
errno = savederrno;
while (size <= CPU_BUFFER_SIZE) {
memcpy(bigbuf, cpus, CPU_BYTES(ncpus));
memset(bigbuf + CPU_BYTES(ncpus), 0,
CPU_BUFFER_SIZE - CPU_BYTES(ncpus));
err = numa_sched_setaffinity_v1_int(0, size, (unsigned long *)bigbuf);
if (err == 0 || errno != EINVAL)
break;
size *= 2;
}
savederrno = errno;
free(bigbuf);
errno = savederrno;
}
return err;
}
/*
* Given a node mask (size of a kernel nodemask_t) (probably populated by
* a user argument list) set up a map of cpus (map "cpus") on those nodes.
* Then set affinity to those cpus.
*/
SYMVER("numa_run_on_node_mask_v2", "numa_run_on_node_mask@@libnuma_1.2")
int
numa_run_on_node_mask_v2(struct bitmask *bmp)
{
int ncpus, i, k, err;
struct bitmask *cpus, *nodecpus;
cpus = numa_allocate_cpumask();
ncpus = cpus->size;
nodecpus = numa_allocate_cpumask();
for (i = 0; i < bmp->size; i++) {
if (bmp->maskp[i / BITS_PER_LONG] == 0)
continue;
if (numa_bitmask_isbitset(bmp, i)) {
/*
* numa_all_nodes_ptr is cpuset aware; use only
* these nodes
*/
if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
numa_warn(W_noderunmask,
"node %d not allowed", i);
continue;
}
if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
numa_warn(W_noderunmask,
"Cannot read node cpumask from sysfs");
continue;
}
for (k = 0; k < CPU_LONGS(ncpus); k++)
cpus->maskp[k] |= nodecpus->maskp[k];
}
}
err = numa_sched_setaffinity_v2_int(0, cpus);
numa_bitmask_free(cpus);
numa_bitmask_free(nodecpus);
/* used to have to consider that this could fail - it shouldn't now */
if (err < 0) {
numa_error("numa_sched_setaffinity_v2_int() failed");
}
return err;
}
make_internal_alias(numa_run_on_node_mask_v2);
/*
* Given a node mask (size of a kernel nodemask_t) (probably populated by
* a user argument list) set up a map of cpus (map "cpus") on those nodes
* without any cpuset awareness. Then set affinity to those cpus.
*/
int
numa_run_on_node_mask_all(struct bitmask *bmp)
{
int ncpus, i, k, err;
struct bitmask *cpus, *nodecpus;
cpus = numa_allocate_cpumask();
ncpus = cpus->size;
nodecpus = numa_allocate_cpumask();
for (i = 0; i < bmp->size; i++) {
if (bmp->maskp[i / BITS_PER_LONG] == 0)
continue;
if (numa_bitmask_isbitset(bmp, i)) {
if (!numa_bitmask_isbitset(numa_possible_nodes_ptr, i)) {
numa_warn(W_noderunmask,
"node %d not allowed", i);
continue;
}
if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
numa_warn(W_noderunmask,
"Cannot read node cpumask from sysfs");
continue;
}
for (k = 0; k < CPU_LONGS(ncpus); k++)
cpus->maskp[k] |= nodecpus->maskp[k];
}
}
err = numa_sched_setaffinity_v2_int(0, cpus);
numa_bitmask_free(cpus);
numa_bitmask_free(nodecpus);
/* With possible nodes freedom it can happen easily now */
if (err < 0) {
numa_error("numa_sched_setaffinity_v2_int() failed");
}
return err;
}
SYMVER("numa_get_run_node_mask_v1", "numa_get_run_node_mask@libnuma_1.1")
nodemask_t
numa_get_run_node_mask_v1(void)
{
int ncpus = numa_num_configured_cpus();
int i, k;
int max = numa_max_node_int();
struct bitmask *bmp, *cpus, *nodecpus;
nodemask_t nmp;
cpus = numa_allocate_cpumask();
if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
nmp = numa_no_nodes;
goto free_cpus;
}
nodecpus = numa_allocate_cpumask();
bmp = allocate_nodemask_v1(); /* the size of a nodemask_t */
for (i = 0; i <= max; i++) {
if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
/* It's possible for the node to not exist */
continue;
}
for (k = 0; k < CPU_LONGS(ncpus); k++) {
if (nodecpus->maskp[k] & cpus->maskp[k])
numa_bitmask_setbit(bmp, i);
}
}
copy_bitmask_to_nodemask(bmp, &nmp);
numa_bitmask_free(bmp);
numa_bitmask_free(nodecpus);
free_cpus:
numa_bitmask_free(cpus);
return nmp;
}
SYMVER("numa_get_run_node_mask_v2", "numa_get_run_node_mask@@libnuma_1.2")
struct bitmask *
numa_get_run_node_mask_v2(void)
{
int i, k;
int ncpus = numa_num_configured_cpus();
int max = numa_max_node_int();
struct bitmask *bmp, *cpus, *nodecpus;
bmp = numa_allocate_cpumask();
cpus = numa_allocate_cpumask();
if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
goto free_cpus;
}
nodecpus = numa_allocate_cpumask();
for (i = 0; i <= max; i++) {
/*
* numa_all_nodes_ptr is cpuset aware; show only
* these nodes
*/
if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
continue;
}
if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
/* It's possible for the node to not exist */
continue;
}
for (k = 0; k < CPU_LONGS(ncpus); k++) {
if (nodecpus->maskp[k] & cpus->maskp[k])
numa_bitmask_setbit(bmp, i);
}
}
numa_bitmask_free(nodecpus);
free_cpus:
numa_bitmask_free(cpus);
return bmp;
}
int
numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bitmask *tonodes)
{
int numa_num_nodes = numa_num_possible_nodes();
return migrate_pages(pid, numa_num_nodes + 1, fromnodes->maskp,
tonodes->maskp);
}
int numa_move_pages(int pid, unsigned long count,
void **pages, const int *nodes, int *status, int flags)
{
return move_pages(pid, count, pages, nodes, status, flags);
}
int numa_run_on_node(int node)
{
int numa_num_nodes = numa_num_possible_nodes();
int ret = -1;
struct bitmask *cpus;
if (node >= numa_num_nodes){
errno = EINVAL;
goto out;
}
cpus = numa_allocate_cpumask();
if (node == -1)
numa_bitmask_setall(cpus);
else if (numa_node_to_cpus_v2_int(node, cpus) < 0){
numa_warn(W_noderunmask, "Cannot read node cpumask from sysfs");
goto free;
}
ret = numa_sched_setaffinity_v2_int(0, cpus);
free:
numa_bitmask_free(cpus);
out:
return ret;
}
int numa_preferred(void)
{
int policy;
int ret;
struct bitmask *bmp;
bmp = numa_allocate_nodemask();
getpol(&policy, bmp);
if (policy == MPOL_PREFERRED || policy == MPOL_BIND) {
int i;
int max = numa_num_possible_nodes();
for (i = 0; i < max ; i++)
if (numa_bitmask_isbitset(bmp, i)){
ret = i;
goto end;
}
}
/* could read the current CPU from /proc/self/status. Probably
not worth it. */
ret = 0; /* or random one? */
end:
numa_bitmask_free(bmp);
return ret;
}
void numa_set_preferred(int node)
{
struct bitmask *bmp;
bmp = numa_allocate_nodemask();
if (node >= 0) {
numa_bitmask_setbit(bmp, node);
setpol(MPOL_PREFERRED, bmp);
} else
setpol(MPOL_DEFAULT, bmp);
numa_bitmask_free(bmp);
}
void numa_set_localalloc(void)
{
setpol(MPOL_DEFAULT, numa_no_nodes_ptr);
}
SYMVER("numa_bind_v1", "numa_bind@libnuma_1.1")
void numa_bind_v1(const nodemask_t *nodemask)
{
struct bitmask bitmask;
bitmask.maskp = (unsigned long *)nodemask;
bitmask.size = sizeof(nodemask_t);
numa_run_on_node_mask_v2_int(&bitmask);
numa_set_membind_v2_int(&bitmask);
}
SYMVER("numa_bind_v2", "numa_bind@@libnuma_1.2")
void numa_bind_v2(struct bitmask *bmp)
{
numa_run_on_node_mask_v2_int(bmp);
numa_set_membind_v2_int(bmp);
}
void numa_set_strict(int flag)
{
if (flag)
mbind_flags |= MPOL_MF_STRICT;
else
mbind_flags &= ~MPOL_MF_STRICT;
}
/*
* Extract a node or processor number from the given string.
* Allow a relative node / processor specification within the allowed
* set if "relative" is nonzero
*/
static unsigned long get_nr(const char *s, char **end, struct bitmask *bmp, int relative)
{
long i, nr;
if (!relative)
return strtoul(s, end, 0);
nr = strtoul(s, end, 0);
if (s == *end)
return nr;
/* Find the nth set bit */
for (i = 0; nr >= 0 && i <= bmp->size; i++)
if (numa_bitmask_isbitset(bmp, i))
nr--;
return i-1;
}
/*
* __numa_parse_nodestring() is called to create a node mask, given
* an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
* (the + indicates that the numbers are nodeset-relative)
*
* The nodes may be specified as absolute, or relative to the current nodeset.
* The list of available nodes is in a map pointed to by "allowed_nodes_ptr",
* which may represent all nodes or the nodes in the current nodeset.
*
* The caller must free the returned bitmask.
*/
static struct bitmask *
__numa_parse_nodestring(const char *s, struct bitmask *allowed_nodes_ptr)
{
int invert = 0, relative = 0;
int conf_nodes = numa_num_configured_nodes();
char *end;
struct bitmask *mask;
mask = numa_allocate_nodemask();
if (s[0] == 0){
copy_bitmask_to_bitmask(numa_no_nodes_ptr, mask);
return mask; /* return freeable mask */
}
if (*s == '!') {
invert = 1;
s++;
}
if (*s == '+') {
relative++;
s++;
}
do {
unsigned long arg;
int i;
if (isalpha(*s)) {
int n;
if (!strcmp(s,"all")) {
copy_bitmask_to_bitmask(allowed_nodes_ptr,
mask);
s+=4;
break;
}
n = resolve_affinity(s, mask);
if (n != NO_IO_AFFINITY) {
if (n < 0)
goto err;
s += strlen(s) + 1;
break;
}
}
arg = get_nr(s, &end, allowed_nodes_ptr, relative);
if (end == s) {
numa_warn(W_nodeparse, "unparseable node description `%s'\n", s);
goto err;
}
if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg)) {
numa_warn(W_nodeparse, "node argument %d is out of range\n", arg);
goto err;
}
i = arg;
numa_bitmask_setbit(mask, i);
s = end;
if (*s == '-') {
char *end2;
unsigned long arg2;
arg2 = get_nr(++s, &end2, allowed_nodes_ptr, relative);
if (end2 == s) {
numa_warn(W_nodeparse, "missing node argument %s\n", s);
goto err;
}
if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg2)) {
numa_warn(W_nodeparse, "node argument %d out of range\n", arg2);
goto err;
}
while (arg <= arg2) {
i = arg;
if (numa_bitmask_isbitset(allowed_nodes_ptr,i))
numa_bitmask_setbit(mask, i);
arg++;
}
s = end2;
}
} while (*s++ == ',');
if (s[-1] != '\0')
goto err;
if (invert) {
int i;
for (i = 0; i < conf_nodes; i++) {
if (numa_bitmask_isbitset(mask, i))
numa_bitmask_clearbit(mask, i);
else
numa_bitmask_setbit(mask, i);
}
}
return mask;
err:
numa_bitmask_free(mask);
return NULL;
}
/*
* numa_parse_nodestring() is called to create a bitmask from nodes available
* for this task.
*/
struct bitmask * numa_parse_nodestring(const char *s)
{
return __numa_parse_nodestring(s, numa_all_nodes_ptr);
}
/*
* numa_parse_nodestring_all() is called to create a bitmask from all nodes
* available.
*/
struct bitmask * numa_parse_nodestring_all(const char *s)
{
return __numa_parse_nodestring(s, numa_possible_nodes_ptr);
}
/*
* __numa_parse_cpustring() is called to create a bitmask, given
* an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
* (the + indicates that the numbers are cpuset-relative)
*
* The cpus may be specified as absolute, or relative to the current cpuset.
* The list of available cpus for this task is in the map pointed to by
* "allowed_cpus_ptr", which may represent all cpus or the cpus in the
* current cpuset.
*
* The caller must free the returned bitmask.
*/
static struct bitmask *
__numa_parse_cpustring(const char *s, struct bitmask *allowed_cpus_ptr)
{
int invert = 0, relative=0;
int conf_cpus = numa_num_configured_cpus();
char *end;
struct bitmask *mask;
mask = numa_allocate_cpumask();
if (s[0] == 0)
return mask;
if (*s == '!') {
invert = 1;
s++;
}
if (*s == '+') {
relative++;
s++;
}
do {
unsigned long arg;
int i;
if (!strcmp(s,"all")) {
copy_bitmask_to_bitmask(allowed_cpus_ptr, mask);
s+=4;
break;
}
arg = get_nr(s, &end, allowed_cpus_ptr, relative);
if (end == s) {
numa_warn(W_cpuparse, "unparseable cpu description `%s'\n", s);
goto err;
}
if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg)) {
numa_warn(W_cpuparse, "cpu argument %s is out of range\n", s);
goto err;
}
i = arg;
numa_bitmask_setbit(mask, i);
s = end;
if (*s == '-') {
char *end2;
unsigned long arg2;
int i;
arg2 = get_nr(++s, &end2, allowed_cpus_ptr, relative);
if (end2 == s) {
numa_warn(W_cpuparse, "missing cpu argument %s\n", s);
goto err;
}
if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg2)) {
numa_warn(W_cpuparse, "cpu argument %s out of range\n", s);
goto err;
}
while (arg <= arg2) {
i = arg;
if (numa_bitmask_isbitset(allowed_cpus_ptr, i))
numa_bitmask_setbit(mask, i);
arg++;
}
s = end2;
}
} while (*s++ == ',');
if (s[-1] != '\0')
goto err;
if (invert) {
int i;
for (i = 0; i < conf_cpus; i++) {
if (numa_bitmask_isbitset(mask, i))
numa_bitmask_clearbit(mask, i);
else
numa_bitmask_setbit(mask, i);
}
}
return mask;
err:
numa_bitmask_free(mask);
return NULL;
}
/*
* numa_parse_cpustring() is called to create a bitmask from cpus available
* for this task.
*/
struct bitmask * numa_parse_cpustring(const char *s)
{
return __numa_parse_cpustring(s, numa_all_cpus_ptr);
}
/*
* numa_parse_cpustring_all() is called to create a bitmask from all cpus
* available.
*/
struct bitmask * numa_parse_cpustring_all(const char *s)
{
return __numa_parse_cpustring(s, numa_possible_cpus_ptr);
}
0707010000000F000041ED00003EA600000032000000026127204300000000000000000000000000000000000000000000001E00000000numactl-2.0.14.20.g4ee5e0c/m407070100000010000081A400003EA6000000320000000161272043000014DA000000000000000000000000000000000000003400000000numactl-2.0.14.20.g4ee5e0c/m4/ax_am_override_var.m4# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_am_override_var.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_AM_OVERRIDE_VAR([varname1 varname ... ])
# AX_AM_OVERRIDE_FINALIZE
#
# DESCRIPTION
#
# This autoconf macro generalizes the approach given in
# <http://lists.gnu.org/archive/html/automake/2005-09/msg00108.html> which
# moves user specified values for variable 'varname' given at configure
# time into the corresponding AM_${varname} variable and clears out
# 'varname', allowing further manipulation by the configure script so that
# target specific variables can be given specialized versions. 'varname
# may still be specified on the make command line and will be appended as
# usual.
#
# As an example usage, consider a project which might benefit from
# different compiler flags for different components. Typically this is
# done via target specific flags, e.g.
#
# libgtest_la_CXXFLAGS = \
# -I $(top_srcdir)/tests \
# -I $(top_builddir)/tests \
# $(GTEST_CXXFLAGS)
#
# automake will automatically append $(CXXFLAGS) -- provided by the user
# -- to the build rule for libgtest_la. That might be problematic, as
# CXXFLAGS may contain compiler options which are inappropriate for
# libgtest_la.
#
# The approach laid out in the referenced mailing list message is to
# supply a base value for a variable during _configure_ time, during which
# it is possible to amend it for specific targets. The user may
# subsequently specify a value for the variable during _build_ time, which
# make will apply (via the standard automake rules) to all appropriate
# targets.
#
# For example,
#
# AX_AM_OVERRIDE_VAR([CXXFLAGS])
#
# will store the value of CXXFLAGS specified at configure time into the
# AM_CXXFLAGS variable, AC_SUBST it, and clear CXXFLAGS. configure may
# then create a target specific set of flags based upon AM_CXXFLAGS, e.g.
#
# # googletest uses variadic macros, which g++ -pedantic-errors
# # is very unhappy about
# AC_SUBST([GTEST_CXXFLAGS],
# [`AS_ECHO_N(["$AM_CXXFLAGS"]) \
# | sed s/-pedantic-errors/-pedantic/`
# ]
# )
#
# which would be used in a Makefile.am as above. Since CXXFLAGS is
# cleared, the configure time value will not affect the build for
# libgtest_la.
#
# Prior to _any other command_ which may set ${varname}, call
#
# AX_AM_OVERRIDE_VAR([varname])
#
# This will preserve the value (if any) passed to configure in
# AM_${varname} and AC_SUBST([AM_${varname}). You may pass a space
# separated list of variable names, or may call AX_AM_OVERRIDE_VAR
# multiple times for the same effect.
#
# If any subsequent configure commands set ${varname} and you wish to
# capture the resultant value into AM_${varname} in the case where
# ${varname} was _not_ provided at configure time, call
#
# AX_AM_OVERRIDE_FINALIZE
#
# after _all_ commands which might affect any of the variables specified
# in calls to AX_AM_OVERRIDE_VAR. This need be done only once, but
# repeated calls will not cause harm.
#
# There is a bit of trickery required to allow further manipulation of the
# AM_${varname} in a Makefile.am file. If AM_CFLAGS is used as is in a
# Makefile.am, e.g.
#
# libfoo_la_CFLAGS = $(AM_CFLAGS)
#
# then automake will emit code in Makefile.in which sets AM_CFLAGS from
# the configure'd value.
#
# If however, AM_CFLAGS is manipulated (i.e. appended to), you will have
# to explicitly arrange for the configure'd value to be substituted:
#
# AM_CFLAGS = @AM_CFLAGS@
# AM_CFLAGS += -lfoo
#
# or else automake will complain about using += before =.
#
# LICENSE
#
# Copyright (c) 2013 Smithsonian Astrophysical Observatory
# Copyright (c) 2013 Diab Jerius <djerius@cfa.harvard.edu>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 1
AC_DEFUN([_AX_AM_OVERRIDE_INITIALIZE],
[
m4_define([_mst_am_override_vars],[])
])
# _AX_AM_OVERRIDE_VAR(varname)
AC_DEFUN([_AX_AM_OVERRIDE_VAR],
[
m4_define([_mst_am_override_vars], m4_defn([_mst_am_override_vars]) $1 )
_mst_am_override_$1_set=false
AS_IF( [test "${$1+set}" = set],
[AC_SUBST([AM_$1],["$$1"])
$1=
_mst_am_override_$1_set=:
]
)
]) # _AX_AM_OVERRIDE_VAR
# _AX_AM_OVERRIDE_FINALIZE(varname)
AC_DEFUN([_AX_AM_OVERRIDE_FINALIZE],
[
AS_IF([$_mst_am_override_$1_set = :],
[],
[AC_SUBST([AM_$1],["$$1"])
$1=
_mst_am_override_$1_set=
]
)
AC_SUBST($1)
]) # _AX_AM_OVERRIDE_FINALIZE
AC_DEFUN([AX_AM_OVERRIDE_VAR],
[
AC_REQUIRE([_AX_AM_OVERRIDE_INITIALIZE])
m4_map_args_w([$1],[_AX_AM_OVERRIDE_VAR(],[)])
])# AX_OVERRIDE_VAR
# AX_AM_OVERRIDE_FINALIZE
AC_DEFUN([AX_AM_OVERRIDE_FINALIZE],
[
AC_REQUIRE([_AX_AM_OVERRIDE_INITIALIZE])
m4_map_args_w(_mst_am_override_vars,[_AX_AM_OVERRIDE_FINALIZE(],[)])
]) # AX_AM_OVERRIDE_FINALIZE
07070100000011000081A400003EA600000032000000016127204300000D09000000000000000000000000000000000000003700000000numactl-2.0.14.20.g4ee5e0c/m4/ax_check_compile_flag.m4# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])
#
# DESCRIPTION
#
# Check whether the given FLAG works with the current language's compiler
# or gives an error. (Warnings, however, are ignored)
#
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
# success/failure.
#
# If EXTRA-FLAGS is defined, it is added to the current language's default
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
# force the compiler to issue an error when a bad flag is given.
#
# INPUT gives an alternative input source to AC_COMPILE_IFELSE.
#
# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
#
# LICENSE
#
# Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
# Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 3
AC_DEFUN([AX_CHECK_COMPILE_FLAG],
[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
[AS_VAR_SET(CACHEVAR,[yes])],
[AS_VAR_SET(CACHEVAR,[no])])
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
[m4_default([$2], :)],
[m4_default([$3], :)])
AS_VAR_POPDEF([CACHEVAR])dnl
])dnl AX_CHECK_COMPILE_FLAGS
07070100000012000081A400003EA600000032000000016127204300000BE3000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/m4/ax_tls.m4# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_tls.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_TLS([action-if-found], [action-if-not-found])
#
# DESCRIPTION
#
# Provides a test for the compiler support of thread local storage (TLS)
# extensions. Defines TLS if it is found. Currently knows about GCC/ICC
# and MSVC. I think SunPro uses the same as GCC, and Borland apparently
# supports either.
#
# LICENSE
#
# Copyright (c) 2008 Alan Woodland <ajw05@aber.ac.uk>
# Copyright (c) 2010 Diego Elio Petteno` <flameeyes@gmail.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 10
AC_DEFUN([AX_TLS], [
AC_MSG_CHECKING(for thread local storage (TLS) class)
AC_CACHE_VAL(ac_cv_tls, [
ax_tls_keywords="__thread __declspec(thread) none"
for ax_tls_keyword in $ax_tls_keywords; do
AS_CASE([$ax_tls_keyword],
[none], [ac_cv_tls=none ; break],
[AC_TRY_COMPILE(
[#include <stdlib.h>
static void
foo(void) {
static ] $ax_tls_keyword [ int bar;
exit(1);
}],
[],
[ac_cv_tls=$ax_tls_keyword ; break],
ac_cv_tls=none
)])
done
])
AC_MSG_RESULT($ac_cv_tls)
AS_IF([test "$ac_cv_tls" != "none"],
AC_DEFINE_UNQUOTED([TLS], $ac_cv_tls, [If the compiler supports a TLS storage class define it to that here])
m4_ifnblank([$1], [$1]),
m4_ifnblank([$2], [$2])
)
])
07070100000013000081A400003EA600000032000000016127204300000088000000000000000000000000000000000000002400000000numactl-2.0.14.20.g4ee5e0c/manlinks#!/bin/sh
# print names of all functions listed in numa.3
# no globals
grep '^\.BI.*numa.*(' numa.3 | sed -e 's/.*\(numa_.*\)(.*/\1/'
07070100000014000081A400003EA6000000320000000161272043000006F6000000000000000000000000000000000000002400000000numactl-2.0.14.20.g4ee5e0c/memhog.8.TH MEMHOG 8 "2003,2004" "SuSE Labs" "Linux Administrator's Manual"
.SH NAME
memhog \- Allocates memory with policy for testing
.SH SYNOPSIS
.B memhog
[
.B \-r<NUM>
] [
.B size kmg
] [
.B policy nodeset
] [
.B \-f<filename>
]
.SH DESCRIPTION
.B memhog
mmaps a memory region for a given size and sets the numa policy (if specified).
It then updates the memory region for the given number of iterations using memset.
.TS
tab(|);
l l.
-r<num>|Repeat memset NUM times
-f<file>|Open file for mmap backing
-H|Disable transparent hugepages
-size|Allocation size in bytes, may have case-insensitive order
|suffix (G=gigabyte, M=megabyte, K=kilobyte)
.TE
Supported numa-policies:
.TP
.B interleave
Memory will be allocated using round robin on nodes. When
memory cannot be allocated on the current interleave, target fall back
to other nodes. Multiple nodes may be specified.
.TP
.B membind
Only allocate memory from nodes. Allocation will fail
when there is not enough memory available on these nodes. Multiple
nodes may be specified.
.TP
.B preferred
Preferably allocate memory on node, but if memory cannot be
allocated there fall back to other nodes. This option takes only a
single node number.
.TP
.B default
Memory will be allocated on the local node (the node the
thread is running on)
.SH EXAMPLES
.TP
# Allocate a 1G region, mmap backed by memhog.mmap file, membind to node 0, repeat test 6 times
memhog -r6 1G --membind 0 -fmemhog.mmap
.TP
# Allocate a 1G region, iterleave across nodes 0,1,2,3, repeat test 4 times
memhog -r4 1G --interleave 0-3
.TP
# Allocate a 1G region, (implicit) default policy, repeat test 8 times
memhog -r8 1G
.SH AUTHORS
Andi Kleen (ak@suse.de)
.SH LICENSE
GPL v2
.SH SEE ALSO
.I mmap(2), memset(3), numactl(8), numastat(8)
07070100000015000081A400003EA600000032000000016127204300000C91000000000000000000000000000000000000002400000000numactl-2.0.14.20.g4ee5e0c/memhog.c/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
Allocate memory with policy for testing.
numactl is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; version
2.
numactl is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should find a copy of v2 of the GNU General Public License somewhere
on your Linux system; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include <stdlib.h>
#include <stdio.h>
#include <sys/mman.h>
#include <sys/fcntl.h>
#include <string.h>
#include <stdbool.h>
#include "numa.h"
#include "numaif.h"
#include "util.h"
#define terr(x) perror(x)
enum {
UNIT = 10*1024*1024,
};
#ifndef MADV_NOHUGEPAGE
#define MADV_NOHUGEPAGE 15
#endif
int repeat = 1;
void usage(void)
{
printf("memhog [-fFILE] [-rNUM] size[kmg] [policy [nodeset]]\n");
printf("-f mmap is backed by FILE\n");
printf("-rNUM repeat memset NUM times\n");
printf("-H disable transparent hugepages\n");
print_policies();
exit(1);
}
long length;
void hog(void *map)
{
long i;
for (i = 0; i < length; i += UNIT) {
long left = length - i;
if (left > UNIT)
left = UNIT;
putchar('.');
fflush(stdout);
memset(map + i, 0xff, left);
}
putchar('\n');
}
int main(int ac, char **av)
{
char *map;
struct bitmask *nodes, *gnodes;
int policy, gpolicy;
int ret = 0;
int loose = 0;
int i;
int fd = -1;
bool disable_hugepage = false;
nodes = numa_allocate_nodemask();
gnodes = numa_allocate_nodemask();
while (av[1] && av[1][0] == '-') {
switch (av[1][1]) {
case 'f':
fd = open(av[1]+2, O_RDWR);
if (fd < 0)
perror(av[1]+2);
break;
case 'r':
repeat = atoi(av[1] + 2);
break;
case 'H':
disable_hugepage = true;
break;
default:
usage();
}
av++;
}
if (!av[1]) usage();
length = memsize(av[1]);
if (av[2] && numa_available() < 0) {
printf("Kernel doesn't support NUMA policy\n");
} else
loose = 1;
policy = parse_policy(av[2], av[3]);
if (policy != MPOL_DEFAULT)
nodes = numa_parse_nodestring(av[3]);
if (!nodes) {
printf ("<%s> is invalid\n", av[3]);
exit(1);
}
if (fd >= 0)
map = mmap(NULL,length,PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
else
map = mmap(NULL, length, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (map == (char*)-1)
err("mmap");
if (mbind(map, length, policy, nodes->maskp, nodes->size, 0) < 0)
terr("mbind");
if (disable_hugepage)
madvise(map, length, MADV_NOHUGEPAGE);
gpolicy = -1;
if (get_mempolicy(&gpolicy, gnodes->maskp, gnodes->size, map, MPOL_F_ADDR) < 0)
terr("get_mempolicy");
if (!loose && policy != gpolicy) {
ret = 1;
printf("policy %d gpolicy %d\n", policy, gpolicy);
}
if (!loose && !numa_bitmask_equal(gnodes, nodes)) {
printf("nodes differ %lx, %lx!\n",
gnodes->maskp[0], nodes->maskp[0]);
ret = 1;
}
for (i = 0; i < repeat; i++)
hog(map);
exit(ret);
}
07070100000016000081A400003EA6000000320000000161272043000007E2000000000000000000000000000000000000002A00000000numactl-2.0.14.20.g4ee5e0c/migratepages.8.\" t
.\" Copyright 2005-2006 Christoph Lameter, Silicon Graphics, Inc.
.\"
.\" based on Andi Kleen's numactl manpage
.\"
.TH MIGRATEPAGES 8 "Jan 2005" "SGI" "Linux Administrator's Manual"
.SH NAME
migratepages \- Migrate the physical location a processes pages
.SH SYNOPSIS
.B migratepages
pid from-nodes to-nodes
.SH DESCRIPTION
.B migratepages
moves the physical location of a processes pages without any changes of the
virtual address space of the process. Moving the pages allows one to change
the distances of a process to its memory. Performance may be optimized by moving
a processes pages to the node where it is executing.
If multiple nodes are specified for from-nodes or to-nodes then
an attempt is made to preserve the relative location of
each page in each nodeset.
For example if we move from nodes 2-5 to 7,9,12-13 then the preferred mode of
operation is to move pages from 2->7, 3->9, 4->12 and 5->13. However, this
is only posssible if enough memory is available.
.TP
Valid node specifiers
.TS
tab(:);
l l.
all:All nodes
number:Node number
number1{,number2}:Node number1 and Node number2
number1-number2:Nodes from number1 to number2
! nodes:Invert selection of the following specification.
.TE
.SH NOTES
Requires a NUMA policy aware kernel with support for page migration
(linux 2.6.16 and later).
migratepages will only move pages that are not shared with other
processes if called by a user without administrative priviledges (but
with the right to modify the process).
migratepages will move all pages if invoked from root (or a user with
administrative priviledges).
.SH FILES
.I /proc/<pid>/numa_maps
for information about the NUMA memory use of a process.
.SH COPYRIGHT
Copyright 2005-2006 Christoph Lameter, Silicon Graphics, Inc.
migratepages is under the GNU General Public License, v.2
.SH SEE ALSO
.I numactl(8)
,
.I set_mempolicy(2)
,
.I get_mempolicy(2)
,
.I mbind(2)
,
.I sched_setaffinity(2)
,
.I sched_getaffinity(2)
,
.I proc(5)
,
.I ftok(3)
,
.I shmat(2)
,
.I taskset(1)
07070100000017000081A400003EA600000032000000016127204300000899000000000000000000000000000000000000002A00000000numactl-2.0.14.20.g4ee5e0c/migratepages.c/*
* Copyright (C) 2005 Christoph Lameter, Silicon Graphics, Incorporated.
* based on Andi Kleen's numactl.c.
*
* Manual process migration
*
* migratepages is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; version 2.
*
* migratepages is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should find a copy of v2 of the GNU General Public License somewhere
* on your Linux system; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define _GNU_SOURCE
#include <getopt.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include "numa.h"
#include "numaif.h"
#include "numaint.h"
#include "util.h"
struct option opts[] = {
{"help", 0, 0, 'h' },
{ 0 }
};
void usage(void)
{
fprintf(stderr,
"usage: migratepages pid from-nodes to-nodes\n"
"\n"
"nodes is a comma delimited list of node numbers or A-B ranges or all.\n"
);
exit(1);
}
void checknuma(void)
{
static int numa = -1;
if (numa < 0) {
if (numa_available() < 0)
complain("This system does not support NUMA functionality");
}
numa = 0;
}
int main(int argc, char *argv[])
{
int c;
char *end;
int rc;
int pid;
struct bitmask *fromnodes;
struct bitmask *tonodes;
while ((c = getopt_long(argc,argv,"h", opts, NULL)) != -1) {
switch (c) {
default:
usage();
}
}
argv += optind;
argc -= optind;
if (argc != 3)
usage();
checknuma();
pid = strtoul(argv[0], &end, 0);
if (*end || end == argv[0])
usage();
fromnodes = numa_parse_nodestring(argv[1]);
if (!fromnodes) {
printf ("<%s> is invalid\n", argv[1]);
exit(1);
}
tonodes = numa_parse_nodestring(argv[2]);
if (!tonodes) {
printf ("<%s> is invalid\n", argv[2]);
exit(1);
}
rc = numa_migrate_pages(pid, fromnodes, tonodes);
if (rc < 0) {
perror("migrate_pages");
return 1;
}
return 0;
}
07070100000018000081A400003EA600000032000000016127204300000320000000000000000000000000000000000000002600000000numactl-2.0.14.20.g4ee5e0c/migspeed.8.\" t
.\" Copyright 2005-2007 Christoph Lameter, Silicon Graphics, Inc.
.\"
.\" based on Andi Kleen's numactl manpage
.\"
.TH MIGSPEED 8 "April 2005" "SGI" "Linux Administrator's Manual"
.SH NAME
migspeed \- Test the speed of page migration
.SH SYNOPSIS
.B migspeed
-p pages from-nodes to-nodes
.SH DESCRIPTION
.B migspeed
attempts to move a sample of pages from the indicated node to the target node
and measures the time it takes to perform the move.
.B -p pages
The default sample is 1000 pages. Override that with another number.
.SH NOTES
Requires a NUMA policy aware kernel with support for page migration
(Linux 2.6.16 and later).
.SH COPYRIGHT
Copyright 2007 Christoph Lameter, Silicon Graphics, Inc.
migratepages is under the GNU General Public License, v.2
.SH SEE ALSO
.I numactl(8)
07070100000019000081A400003EA600000032000000016127204300000D37000000000000000000000000000000000000002600000000numactl-2.0.14.20.g4ee5e0c/migspeed.c/*
* Migration test program
*
* (C) 2007 Silicon Graphics, Inc. Christoph Lameter <clameter@sgi.com>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "numa.h"
#include "numaif.h"
#include <time.h>
#include <errno.h>
#include <malloc.h>
#include <unistd.h>
#include "util.h"
char *memory;
unsigned long pages = 1000;
unsigned long pagesize;
const char *optstr = "hvp:";
char *cmd;
int verbose;
struct timespec start,end;
void usage(void)
{
printf("usage %s [-p pages] [-h] [-v] from-nodes to-nodes\n", cmd);
printf(" from and to nodes may specified in form N or N-N\n");
printf(" -p pages number of pages to try (defaults to %ld)\n",
pages);
printf(" -v verbose\n");
printf(" -h usage\n");
exit(1);
}
void displaymap(void)
{
FILE *f = fopen("/proc/self/numa_maps","r");
if (!f) {
printf("/proc/self/numa_maps not accessible.\n");
exit(1);
}
while (!feof(f))
{
char buffer[2000];
if (!fgets(buffer, sizeof(buffer), f))
break;
if (!strstr(buffer, "bind"))
continue ;
printf("%s", buffer);
}
fclose(f);
}
int main(int argc, char *argv[])
{
char *p;
int option;
struct timespec result;
unsigned long bytes;
double duration, mbytes;
struct bitmask *from;
struct bitmask *to;
pagesize = getpagesize();
/* Command line processing */
opterr = 1;
cmd = argv[0];
while ((option = getopt(argc, argv, optstr)) != EOF)
switch (option) {
case 'h' :
case '?' :
usage();
case 'v' :
verbose++;
break;
case 'p' :
pages = strtoul(optarg, &p, 0);
if (p == optarg || *p)
usage();
break;
}
if (!argv[optind])
usage();
if (verbose > 1)
printf("numa_max_node = %d\n", numa_max_node());
numa_exit_on_error = 1;
from = numa_parse_nodestring(argv[optind]);
if (!from) {
printf ("<%s> is invalid\n", argv[optind]);
exit(1);
}
if (errno) {
perror("from mask");
exit(1);
}
if (verbose)
printmask("From", from);
if (!argv[optind+1])
usage();
to = numa_parse_nodestring(argv[optind+1]);
if (!to) {
printf ("<%s> is invalid\n", argv[optind+1]);
exit(1);
}
if (errno) {
perror("to mask");
exit(1);
}
if (verbose)
printmask("To", to);
bytes = pages * pagesize;
if (verbose)
printf("Allocating %lu pages of %lu bytes of memory\n",
pages, pagesize);
memory = memalign(pagesize, bytes);
if (!memory) {
printf("Out of Memory\n");
exit(2);
}
if (mbind(memory, bytes, MPOL_BIND, from->maskp, from->size, 0) < 0)
numa_error("mbind");
if (verbose)
printf("Dirtying memory....\n");
for (p = memory; p <= memory + bytes; p += pagesize)
*p = 1;
if (verbose)
printf("Starting test\n");
displaymap();
clock_gettime(CLOCK_REALTIME, &start);
if (mbind(memory, bytes, MPOL_BIND, to->maskp, to->size, MPOL_MF_MOVE) <0)
numa_error("memory move");
clock_gettime(CLOCK_REALTIME, &end);
displaymap();
result.tv_sec = end.tv_sec - start.tv_sec;
result.tv_nsec = end.tv_nsec - start.tv_nsec;
if (result.tv_nsec < 0) {
result.tv_sec--;
result.tv_nsec += 1000000000;
}
if (result.tv_nsec >= 1000000000) {
result.tv_sec++;
result.tv_nsec -= 1000000000;
}
duration = result.tv_sec + result.tv_nsec / 1000000000.0;
mbytes = bytes / (1024*1024.0);
printf("%1.1f Mbyte migrated in %1.2f secs. %3.1f Mbytes/second\n",
mbytes,
duration,
mbytes / duration);
return 0;
}
0707010000001A000081A400003EA600000032000000016127204300001113000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/move_pages.2.\" Hey Emacs! This file is -*- nroff -*- source.
.\"
.\" This manpage is Copyright (C) 2006 Silicon Graphics, Inc.
.\" Christoph Lameter
.\"
.\" Permission is granted to make and distribute verbatim copies of this
.\" manual provided the copyright notice and this permission notice are
.\" preserved on all copies.
.\"
.\" Permission is granted to copy and distribute modified versions of this
.\" manual under the conditions for verbatim copying, provided that the
.\" entire resulting derived work is distributed under the terms of a
.\" permission notice identical to this one.
.\"
.TH MOVE_PAGES 2 2006-10-31 "Linux 2.6.18" "Linux Programmer's Manual"
.SH NAME
move_pages \- Move individual pages of a process to another node
.SH SYNOPSIS
.B #include <numaif.h>
.sp
.BI "long move_pages(int " pid ", unsigned long count, void ** " pages ", const int * " nodes ", int * " status ", int " flags );
.SH DESCRIPTION
.BR move_pages ()
moves
.I count
pages to the
.I nodes.
The result of the move is reflected in
.I status.
The
.I flags
indicate constraints on the pages to be moved.
.I pid
is the process id in which pages are to be moved. Sufficient rights
must exist to move pages of another process. This means the moving
process either has root priviledges, has SYS_NICE administrative rights or
the same owner. If pid is 0 then we move pages of the current process.
.I count
is the number of pages to move. It defines the size of the three
arrays
.I pages,
.I nodes
and
.I status.
.I pages
is an array of pointers to the pages that should be moved. These are pointers
that should be aligned to page boundaries. Addresses are specified as seen by
the process specified by
.I pid.
.I nodes
is either an array of integers that specify the desired location for each
page or it is NULL. Each integer is a node number. If NULL is specified then
move_pages will not move any pages but return the node of each page in
the
.I status
array. Having the status of each page may be necessary to determine
pages that need to be moved.
.I status
is an array of integers that return the status of each page. The array
only contains valid values if
.I move_pages
did not return an error code.
.I flags
specify what types of pages to move.
.B MPOL_MF_MOVE
means that only pages that are in exclusive use by the process
are to be moved.
.B MPOL_MF_MOVE_ALL
means that pages shared between multiple processes can also be moved.
The process must have root priviledges or SYS_NICE priviledges.
.SH Page states in the status array
.TP
.B 0..MAX_NUMNODES
Indicates that the location of the page is on this node.
.TP
.B -ENOENT
The page is not present.
.TP
.B -EACCES
The page is mapped by multiple processes and can only be moved
if
.I MPOL_MF_MOVE_ALL
is specified.
.TP
.B -EBUSY
The page is currently busy and cannot be moved. Try again later.
This occurs if a page is undergoing I/O or another kernel subsystem
is holding a reference to the page.
.TP
.B -EFAULT
This is a zero page or the memory area is not mapped by the process.
.TP
.B -ENOMEM
Unable to allocate memory on target node.
.TP
.B -EIO
Unable to write back a page. The page has to be written back
in order to move ti since the page is dirty and the filesystem
has not provide a migration function that would allow the move
of dirty pages.
.TP
.B -EINVAL
A dirty page cannot be moved. The filesystem does not
provide a migration function and has no ability to write back pages.
.SH "RETURN VALUE"
On success
.B move_pages
returns zero.
.SH ERRORS
.TP
.B -ENOENT
No pages were found that require moving. All pages are either already
on the target node, not present, had an invalid address or could not be
moved because they were mapped by multiple processes.
.TP
.B -EINVAL
Flags other than
.I MPOL_MF_MOVE
and
.I MPOL_MF_MOVE_ALL
was specified or an attempt was made to migrate pages of a kernel thread.
.TP
.B -EPERM
.I MPOL_MF_MOVE_ALL
specified without sufficient privileges or an attempt to move a process
belonging to another user.
.TP
.B -EACCESS
On of the target nodes is not allowed by the current cpuset.
.TP
.B -ENODEV
On of the target nodes is not online.
.TP
.B -ESRCH
Process does not exist.
.TP
.B -E2BIG
Too many pages to move.
.TP
.B -EFAULT
Parameter array could not be accessed.
.SH "SEE ALSO"
.BR numa_maps (5),
.BR migratepages (8),
.BR numa_stat (8),
.BR numa (3)
0707010000001B000081A400003EA600000032000000016127204300000485000000000000000000000000000000000000002000000000numactl-2.0.14.20.g4ee5e0c/mt.c/* Mersenne twister implementation from Michael Brundage. Public Domain.
MT is a very fast pseudo random number generator. This version works
on 32bit words. Changes by AK. */
#include <stdlib.h>
#include "mt.h"
int mt_index;
unsigned int mt_buffer[MT_LEN];
void mt_init(void)
{
int i;
srand(1);
for (i = 0; i < MT_LEN; i++)
mt_buffer[i] = rand();
mt_index = 0;
}
#define MT_IA 397
#define MT_IB (MT_LEN - MT_IA)
#define UPPER_MASK 0x80000000
#define LOWER_MASK 0x7FFFFFFF
#define MATRIX_A 0x9908B0DF
#define TWIST(b,i,j) ((b)[i] & UPPER_MASK) | ((b)[j] & LOWER_MASK)
#define MAGIC(s) (((s)&1)*MATRIX_A)
void mt_refill(void)
{
int i;
unsigned int s;
unsigned int * b = mt_buffer;
mt_index = 0;
i = 0;
for (; i < MT_IB; i++) {
s = TWIST(b, i, i+1);
b[i] = b[i + MT_IA] ^ (s >> 1) ^ MAGIC(s);
}
for (; i < MT_LEN-1; i++) {
s = TWIST(b, i, i+1);
b[i] = b[i - MT_IB] ^ (s >> 1) ^ MAGIC(s);
}
s = TWIST(b, MT_LEN-1, 0);
b[MT_LEN-1] = b[MT_IA-1] ^ (s >> 1) ^ MAGIC(s);
}
0707010000001C000081A400003EA6000000320000000161272043000001AB000000000000000000000000000000000000002000000000numactl-2.0.14.20.g4ee5e0c/mt.h#define MT_LEN 624
extern void mt_init(void);
extern void mt_refill(void);
extern int mt_index;
extern unsigned int mt_buffer[MT_LEN];
static inline unsigned int mt_random(void)
{
unsigned int * b = mt_buffer;
int idx = mt_index;
if (idx == MT_LEN*sizeof(unsigned int)) {
mt_refill();
idx = 0;
}
mt_index += sizeof(unsigned int);
return *(unsigned int *)((unsigned char *)b + idx);
}
0707010000001D000081A400003EA60000003200000001612720430000855D000000000000000000000000000000000000002200000000numactl-2.0.14.20.g4ee5e0c/numa.3.\" Copyright 2003,2004 Andi Kleen, SuSE Labs.
.\"
.\" Permission is granted to make and distribute verbatim copies of this
.\" manual provided the copyright notice and this permission notice are
.\" preserved on all copies.
.\"
.\" Permission is granted to copy and distribute modified versions of this
.\" manual under the conditions for verbatim copying, provided that the
.\" entire resulting derived work is distributed under the terms of a
.\" permission notice identical to this one.
.\"
.\" Since the Linux kernel and libraries are constantly changing, this
.\" manual page may be incorrect or out-of-date. The author(s) assume no
.\" responsibility for errors or omissions, or for damages resulting from
.\" the use of the information contained herein.
.\"
.\" Formatted or processed versions of this manual, if unaccompanied by
.\" the source, must acknowledge the copyright and authors of this work.
.TH NUMA 3 "December 2007" "SuSE Labs" "Linux Programmer's Manual"
.SH NAME
numa \- NUMA policy library
.SH SYNOPSIS
.B #include <numa.h>
.sp
.B cc ... \-lnuma
.sp
.B int numa_available(void);
.sp
.BI "int numa_max_possible_node(void);"
.br
.BI "int numa_num_possible_nodes();"
.sp
.B int numa_max_node(void);
.br
.BI "int numa_num_configured_nodes();"
.br
.B struct bitmask *numa_get_mems_allowed(void);
.sp
.BI "int numa_num_configured_cpus(void);"
.br
.BI "struct bitmask *numa_all_nodes_ptr;"
.br
.BI "struct bitmask *numa_no_nodes_ptr;"
.br
.BI "struct bitmask *numa_all_cpus_ptr;"
.sp
.BI "int numa_num_task_cpus();"
.br
.BI "int numa_num_task_nodes();"
.sp
.BI "int numa_parse_bitmap(char *" line " , struct bitmask *" mask ");
.br
.BI "struct bitmask *numa_parse_nodestring(const char *" string );
.br
.BI "struct bitmask *numa_parse_nodestring_all(const char *" string );
.br
.BI "struct bitmask *numa_parse_cpustring(const char *" string );
.br
.BI "struct bitmask *numa_parse_cpustring_all(const char *" string );
.sp
.BI "long long numa_node_size(int " node ", long long*" freep );
.br
.BI "long long numa_node_size64(int " node ", long long *" freep );
.sp
.B int numa_preferred(void);
.br
.BI "void numa_set_preferred(int " node );
.br
.BI "int numa_get_interleave_node(void);
.br
.B struct bitmask *numa_get_interleave_mask(void);
.br
.BI "void numa_set_interleave_mask(struct bitmask *" nodemask );
.br
.BI "void numa_interleave_memory(void *" start ", size_t " size ", struct bitmask *" nodemask );
.br
.BI "void numa_bind(struct bitmask *" nodemask );
.br
.BI "void numa_set_localalloc(void);
.br
.BI "void numa_set_membind(struct bitmask *" nodemask );
.br
.BI "void numa_set_membind_balancing(struct bitmask *" nodemask );
.br
.B struct bitmask *numa_get_membind(void);
.sp
.BI "void *numa_alloc_onnode(size_t " size ", int " node );
.br
.BI "void *numa_alloc_local(size_t " size );
.br
.BI "void *numa_alloc_interleaved(size_t " size );
.br
.BI "void *numa_alloc_interleaved_subset(size_t " size ", struct bitmask *" nodemask );
.BI "void *numa_alloc(size_t " size );
.br
.BI "void *numa_realloc(void *"old_addr ", size_t " old_size ", size_t " new_size );
.br
.BI "void numa_free(void *" start ", size_t " size );
.sp
.BI "int numa_run_on_node(int " node );
.br
.BI "int numa_run_on_node_mask(struct bitmask *" nodemask );
.br
.BI "int numa_run_on_node_mask_all(struct bitmask *" nodemask );
.br
.B struct bitmask *numa_get_run_node_mask(void);
.sp
.BI "void numa_tonode_memory(void *" start ", size_t " size ", int " node );
.br
.BI "void numa_tonodemask_memory(void *" start ", size_t " size ", struct bitmask *" nodemask );
.br
.BI "void numa_setlocal_memory(void *" start ", size_t " size );
.br
.BI "void numa_police_memory(void *" start ", size_t " size );
.br
.BI "void numa_set_bind_policy(int " strict );
.br
.BI "void numa_set_strict(int " strict );
.sp
.\" should be undocumented ??
.BI "int numa_distance(int " node1 ", int " node2 );
.sp
.BI "int numa_sched_getaffinity(pid_t " pid ", struct bitmask *" mask );
.br
.BI "int numa_sched_setaffinity(pid_t " pid ", struct bitmask *" mask );
.br
.BI "int numa_node_to_cpus(int " node ", struct bitmask *" mask ");
.br
.BI "void numa_node_to_cpu_update();"
.br
.BI "int numa_node_of_cpu(int " cpu ");
.sp
.BI "struct bitmask *numa_allocate_cpumask();"
.sp
.BI "void numa_free_cpumask();"
.br
.BI "struct bitmask *numa_allocate_nodemask();"
.sp
.BI "void numa_free_nodemask();"
.br
.BI "struct bitmask *numa_bitmask_alloc(unsigned int " n ");
.br
.BI "struct bitmask *numa_bitmask_clearall(struct bitmask *" bmp );
.br
.BI "struct bitmask *numa_bitmask_clearbit(struct bitmask *" bmp ", unsigned int " n );
.br
.BI "int numa_bitmask_equal(const struct bitmask *" bmp1 ", const struct bitmask *" bmp2 );
.br
.BI "void numa_bitmask_free(struct bitmask *" bmp );
.br
.BI "int numa_bitmask_isbitset(const struct bitmask *" bmp ", unsigned int " n ");"
.br
.BI "unsigned int numa_bitmask_nbytes(struct bitmask *" bmp );
.br
.BI "struct bitmask *numa_bitmask_setall(struct bitmask *" bmp );
.br
.BI "struct bitmask *numa_bitmask_setbit(struct bitmask *" bmp ", unsigned int " n );
.br
.BI "void copy_bitmask_to_nodemask(struct bitmask *" bmp ", nodemask_t *" nodemask )
.br
.BI "void copy_nodemask_to_bitmask(nodemask_t *" nodemask ", struct bitmask *" bmp )
.br
.BI "void copy_bitmask_to_bitmask(struct bitmask *" bmpfrom ", struct bitmask *" bmpto )
.br
.BI "unsigned int numa_bitmask_weight(const struct bitmask *bmp )
.sp
.BI "int numa_move_pages(int " pid ", unsigned long " count ", void **" pages ", const int *" nodes ", int *" status ", int " flags );
.br
.BI "int numa_migrate_pages(int " pid ", struct bitmask *" fromnodes ", struct bitmask *" tonodes );
.sp
.BI "void numa_error(char *" where );
.sp
.BI "extern int " numa_exit_on_error ;
.br
.BI "extern int " numa_exit_on_warn ;
.br
.BI "void numa_warn(int " number ", char *" where ", ...);"
.br
.SH DESCRIPTION
The
.I libnuma
library offers a simple programming interface to the
NUMA (Non Uniform Memory Access)
policy supported by the
Linux kernel. On a NUMA architecture some
memory areas have different latency or bandwidth than others.
Available policies are
page interleaving (i.e., allocate in a round-robin fashion from all,
or a subset, of the nodes on the system),
preferred node allocation (i.e., preferably allocate on a particular node),
local allocation (i.e., allocate on the node on which
the task is currently executing),
or allocation only on specific nodes (i.e., allocate on
some subset of the available nodes).
It is also possible to bind tasks to specific nodes.
Numa memory allocation policy may be specified as a per-task attribute,
that is inherited by children tasks and processes, or as an attribute
of a range of process virtual address space.
Numa memory policies specified for a range of virtual address space are
shared by all tasks in the process.
Furthermore, memory policies specified for a range of a shared memory
attached using
.I shmat(2)
or
.I mmap(2)
from shmfs/hugetlbfs are shared by all processes that attach to that region.
Memory policies for shared disk backed file mappings are currently ignored.
The default memory allocation policy for tasks and all memory range
is local allocation.
This assumes that no ancestor has installed a non-default policy.
For setting a specific policy globally for all memory allocations
in a process and its children it is easiest
to start it with the
.BR numactl (8)
utility. For more finegrained policy inside an application this library
can be used.
All numa memory allocation policy only takes effect when a page is actually
faulted into the address space of a process by accessing it. The
.B numa_alloc_*
functions take care of this automatically.
A
.I node
is defined as an area where all memory has the same speed as seen from
a particular CPU.
A node can contain multiple CPUs.
Caches are ignored for this definition.
Most functions in this library are only concerned about numa nodes and
their memory.
The exceptions to this are:
.IR numa_node_to_cpus (),
.IR numa_node_to_cpu_update (),
.IR numa_node_of_cpu (),
.IR numa_bind (),
.IR numa_run_on_node (),
.IR numa_run_on_node_mask (),
.IR numa_run_on_node_mask_all (),
and
.IR numa_get_run_node_mask ().
These functions deal with the CPUs associated with numa nodes.
See the descriptions below for more information.
Some of these functions accept or return a pointer to struct bitmask.
A struct bitmask controls a bit map of arbitrary length containing a bit
representation of nodes. The predefined variable
.I numa_all_nodes_ptr
points to a bit mask that has all available nodes set;
.I numa_no_nodes_ptr
points to the empty set.
Before any other calls in this library can be used
.BR numa_available ()
must be called. If it returns \-1, all other functions in this
library are undefined.
.BR numa_max_possible_node()
returns the number of the highest possible node in a system.
In other words, the size of a kernel type nodemask_t (in bits) minus 1.
This number can be gotten by calling
.BR numa_num_possible_nodes()
and subtracting 1.
.BR numa_num_possible_nodes()
returns the size of kernel's node mask (kernel type nodemask_t).
In other words, large enough to represent the maximum number of nodes that
the kernel can handle. This will match the kernel's MAX_NUMNODES value.
This count is derived from /proc/self/status, field Mems_allowed.
.BR numa_max_node ()
returns the highest node number available on the current system.
(See the node numbers in /sys/devices/system/node/ ). Also see
.BR numa_num_configured_nodes().
.BR numa_num_configured_nodes()
returns the number of memory nodes in the system. This count
includes any nodes that are currently disabled. This count is derived from
the node numbers in /sys/devices/system/node. (Depends on the kernel being
configured with /sys (CONFIG_SYSFS)).
.BR numa_get_mems_allowed()
returns the mask of nodes from which the process is allowed to allocate
memory in it's current cpuset context.
Any nodes that are not included in the returned bitmask will be ignored
in any of the following libnuma memory policy calls.
.BR numa_num_configured_cpus()
returns the number of cpus in the system. This count includes
any cpus that are currently disabled. This count is derived from the cpu
numbers in /sys/devices/system/cpu. If the kernel is configured without
/sys (CONFIG_SYSFS=n) then it falls back to using the number of online cpus.
.BR numa_all_nodes_ptr
points to a bitmask that is allocated by the library with bits
representing all nodes on which the calling task may allocate memory.
This set may be up to all nodes on the system, or up to the nodes in
the current cpuset.
The bitmask is allocated by a call to
.BR numa_allocate_nodemask()
using size
.BR numa_max_possible_node().
The set of nodes to record is derived from /proc/self/status, field
"Mems_allowed". The user should not alter this bitmask.
.BR numa_no_nodes_ptr
points to a bitmask that is allocated by the library and left all
zeroes. The bitmask is allocated by a call to
.BR numa_allocate_nodemask()
using size
.BR numa_max_possible_node().
The user should not alter this bitmask.
.BR numa_all_cpus_ptr
points to a bitmask that is allocated by the library with bits
representing all cpus on which the calling task may execute.
This set may be up to all cpus on the system, or up to the cpus in
the current cpuset.
The bitmask is allocated by a call to
.BR numa_allocate_cpumask()
using size
.BR numa_num_possible_cpus().
The set of cpus to record is derived from /proc/self/status, field
"Cpus_allowed". The user should not alter this bitmask.
.BR numa_num_task_cpus()
returns the number of cpus that the calling task is allowed
to use. This count is derived from the map /proc/self/status, field
"Cpus_allowed". Also see the bitmask
.BR numa_all_cpus_ptr.
.BR numa_num_task_nodes()
returns the number of nodes on which the calling task is
allowed to allocate memory. This count is derived from the map
/proc/self/status, field "Mems_allowed".
Also see the bitmask
.BR numa_all_nodes_ptr.
.BR numa_parse_bitmap()
parses
.I line
, which is a character string such as found in
/sys/devices/system/node/nodeN/cpumap into a bitmask structure.
The string contains the hexadecimal representation of a bit map.
The bitmask may be allocated with
.BR numa_allocate_cpumask().
Returns 0 on success. Returns -1 on failure.
This function is probably of little use to a user application, but
it is used by
.I libnuma
internally.
.BR numa_parse_nodestring()
parses a character string list of nodes into a bit mask.
The bit mask is allocated by
.BR numa_allocate_nodemask().
The string is a comma-separated list of node numbers or node ranges.
A leading ! can be used to indicate "not" this list (in other words, all
nodes except this list), and a leading + can be used to indicate that the
node numbers in the list are relative to the task's cpuset. The string can
be "all" to specify all (
.BR numa_num_task_nodes()
) nodes. Node numbers are limited by the number in the system. See
.BR numa_max_node()
and
.BR numa_num_configured_nodes().
.br
Examples: 1-5,7,10 !4-5 +0-3
.br
If the string is of 0 length, bitmask
.BR numa_no_nodes_ptr
is returned. Returns 0 if the string is invalid.
.BR numa_parse_nodestring_all()
is similar to
.BR numa_parse_nodestring
, but can parse all possible nodes, not only current nodeset.
.BR numa_parse_cpustring()
parses a character string list of cpus into a bit mask.
The bit mask is allocated by
.BR numa_allocate_cpumask().
The string is a comma-separated list of cpu numbers or cpu ranges.
A leading ! can be used to indicate "not" this list (in other words, all
cpus except this list), and a leading + can be used to indicate that the cpu
numbers in the list are relative to the task's cpuset. The string can be
"all" to specify all (
.BR numa_num_task_cpus()
) cpus.
Cpu numbers are limited by the number in the system. See
.BR numa_num_task_cpus()
and
.BR numa_num_configured_cpus().
.br
Examples: 1-5,7,10 !4-5 +0-3
.br
Returns 0 if the string is invalid.
.BR numa_parse_cpustring_all()
is similar to
.BR numa_parse_cpustring
, but can parse all possible cpus, not only current cpuset.
.BR numa_node_size ()
returns the memory size of a node. If the argument
.I freep
is not NULL, it used to return the amount of free memory on the node.
On error it returns \-1.
.BR numa_node_size64 ()
works the same as
.BR numa_node_size ().
This is useful on 32-bit architectures with large nodes.
.BR numa_preferred ()
returns the preferred node of the current task.
This is the node on which the kernel preferably
allocates memory, unless some other policy overrides this.
.\" TODO: results are misleading for MPOL_PREFERRED and may
.\" be incorrect for MPOL_BIND when Mel Gorman's twozonelist
.\" patches go in. In the latter case, we'd need to know the
.\" order of the current node's zonelist to return the correct
.\" node. Need to tighten this up with the syscall results.
.BR numa_set_preferred ()
sets the preferred node for the current task to
.IR node .
The system will attempt to allocate memory from the preferred node,
but will fall back to other nodes if no memory is available on the
the preferred node.
Passing a
.I node
of \-1 argument specifies local allocation and is equivalent to
calling
.BR numa_set_localalloc ().
.BR numa_get_interleave_mask ()
returns the current interleave mask if the task's memory allocation policy
is page interleaved.
Otherwise, this function returns an empty mask.
.BR numa_set_interleave_mask ()
sets the memory interleave mask for the current task to
.IR nodemask .
All new memory allocations
are page interleaved over all nodes in the interleave mask. Interleaving
can be turned off again by passing an empty mask
.RI ( numa_no_nodes ).
The page interleaving only occurs on the actual page fault that puts a new
page into the current address space. It is also only a hint: the kernel
will fall back to other nodes if no memory is available on the interleave
target.
.\" NOTE: the following is not really the case. this function sets the
.\" task policy for all future allocations, including stack, bss, ...
.\" The functions specified in this sentence actually allocate a new memory
.\" range [via mmap()]. This is quite a different thing. Suggest we drop
.\" this.
.\" This is a low level
.\" function, it may be more convenient to use the higher level functions like
.\" .BR numa_alloc_interleaved ()
.\" or
.\" .BR numa_alloc_interleaved_subset ().
.BR numa_interleave_memory ()
interleaves
.I size
bytes of memory page by page from
.I start
on nodes specified in
.IR nodemask .
The
.I size
argument will be rounded up to a multiple of the system page size.
If
.I nodemask
contains nodes that are externally denied to this process,
this call will fail.
This is a lower level function to interleave allocated but not yet faulted in
memory. Not yet faulted in means the memory is allocated using
.BR mmap (2)
or
.BR shmat (2),
but has not been accessed by the current process yet. The memory is page
interleaved to all nodes specified in
.IR nodemask .
Normally
.BR numa_alloc_interleaved ()
should be used for private memory instead, but this function is useful to
handle shared memory areas. To be useful the memory area should be
several megabytes at least (or tens of megabytes of hugetlbfs mappings)
If the
.BR numa_set_strict ()
flag is true then the operation will cause a numa_error if there were already
pages in the mapping that do not follow the policy.
.BR numa_bind ()
binds the current task and its children to the nodes
specified in
.IR nodemask .
They will only run on the CPUs of the specified nodes and only be able to allocate
memory from them.
This function is equivalent to calling
.\" FIXME checkme
.\" This is the case. --lts
.I numa_run_on_node_mask(nodemask)
followed by
.IR numa_set_membind(nodemask) .
If tasks should be bound to individual CPUs inside nodes
consider using
.I numa_node_to_cpus
and the
.I sched_setaffinity(2)
syscall.
.BR numa_set_localalloc ()
sets the memory allocation policy for the calling task to
local allocation.
In this mode, the preferred node for memory allocation is
effectively the node where the task is executing at the
time of a page allocation.
.BR numa_set_membind ()
sets the memory allocation mask.
The task will only allocate memory from the nodes set in
.IR nodemask .
Passing an empty
.I nodemask
or a
.I nodemask
that contains nodes other than those in the mask returned by
.IR numa_get_mems_allowed ()
will result in an error.
.BR numa_set_membind_balancing ()
sets the memory allocation mask and enable the Linux kernel NUMA
balancing for the task if the feature is supported by the kernel.
The task will only allocate memory from the nodes set in
.IR nodemask .
Passing an empty
.I nodemask
or a
.I nodemask
that contains nodes other than those in the mask returned by
.IR numa_get_mems_allowed ()
will result in an error.
.BR numa_get_membind ()
returns the mask of nodes from which memory can currently be allocated.
If the returned mask is equal to
.IR numa_all_nodes ,
then memory allocation is allowed from all nodes.
.BR numa_alloc_onnode ()
allocates memory on a specific node.
The
.I size
argument will be rounded up to a multiple of the system page size.
if the specified
.I node
is externally denied to this process, this call will fail.
This function is relatively slow compared to the
.IR malloc (3),
family of functions.
The memory must be freed
with
.BR numa_free ().
On errors NULL is returned.
.BR numa_alloc_local ()
allocates
.I size
bytes of memory on the local node.
The
.I size
argument will be rounded up to a multiple of the system page size.
This function is relatively slow compared to the
.IR malloc (3)
family of functions.
The memory must be freed
with
.BR numa_free ().
On errors NULL is returned.
.BR numa_alloc_interleaved ()
allocates
.I size
bytes of memory page interleaved on all nodes. This function is relatively slow
and should only be used for large areas consisting of multiple pages. The
interleaving works at page level and will only show an effect when the
area is large.
The allocated memory must be freed with
.BR numa_free ().
On error, NULL is returned.
.BR numa_alloc_interleaved_subset ()
attempts to allocate
.I size
bytes of memory page interleaved on all nodes.
The
.I size
argument will be rounded up to a multiple of the system page size.
The nodes on which a process is allowed to allocate memory may
be constrained externally.
If this is the case, this function may fail.
This function is relatively slow compare to
.IR malloc (3),
family of functions and should only be used for large areas consisting
of multiple pages.
The interleaving works at page level and will only show an effect when the
area is large.
The allocated memory must be freed with
.BR numa_free ().
On error, NULL is returned.
.BR numa_alloc ()
allocates
.I size
bytes of memory with the current NUMA policy.
The
.I size
argument will be rounded up to a multiple of the system page size.
This function is relatively slow compare to the
.IR malloc (3)
family of functions.
The memory must be freed
with
.BR numa_free ().
On errors NULL is returned.
.BR numa_realloc ()
changes the size of the memory area pointed to by
.I old_addr
from
.I old_size
to
.I new_size.
The memory area pointed to by
.I old_addr
must have been allocated with one of the
.BR numa_alloc*
functions.
The
.I new_size
will be rounded up to a multiple of the system page size. The contents of the
memory area will be unchanged to the minimum of the old and new sizes; newly
allocated memory will be uninitialized. The memory policy (and node bindings)
associated with the original memory area will be preserved in the resized
area. For example, if the initial area was allocated with a call to
.BR numa_alloc_onnode(),
then the new pages (if the area is enlarged) will be allocated on the same node.
However, if no memory policy was set for the original area, then
.BR numa_realloc ()
cannot guarantee that the new pages will be allocated on the same node. On
success, the address of the resized area is returned (which might be different
from that of the initial area), otherwise NULL is returned and
.I errno
is set to indicate the error. The pointer returned by
.BR numa_realloc ()
is suitable for passing to
.BR numa_free ().
.BR numa_free ()
frees
.I size
bytes of memory starting at
.IR start ,
allocated by the
.B numa_alloc_*
functions above.
The
.I size
argument will be rounded up to a multiple of the system page size.
.BR numa_run_on_node ()
runs the current task and its children
on a specific node. They will not migrate to CPUs of
other nodes until the node affinity is reset with a new call to
.BR numa_run_on_node_mask ().
Passing \-1
permits the kernel to schedule on all nodes again.
On success, 0 is returned; on error \-1 is returned, and
.I errno
is set to indicate the error.
.BR numa_run_on_node_mask ()
runs the current task and its children only on nodes specified in
.IR nodemask .
They will not migrate to CPUs of
other nodes until the node affinity is reset with a new call to
.BR numa_run_on_node_mask ()
or
.BR numa_run_on_node ().
Passing
.I numa_all_nodes
permits the kernel to schedule on all nodes again.
On success, 0 is returned; on error \-1 is returned, and
.I errno
is set to indicate the error.
.BR numa_run_on_node_mask_all ()
runs the current task and its children only on nodes specified in
.IR nodemask
like
.I numa_run_on_node_mask
but without any cpuset awareness.
.BR numa_get_run_node_mask ()
returns a mask of CPUs on which the current task is allowed to run.
.BR numa_tonode_memory ()
put memory on a specific node. The constraints described for
.BR numa_interleave_memory ()
apply here too.
.BR numa_tonodemask_memory ()
put memory on a specific set of nodes. The constraints described for
.BR numa_interleave_memory ()
apply here too.
.BR numa_setlocal_memory ()
locates memory on the current node. The constraints described for
.BR numa_interleave_memory ()
apply here too.
.BR numa_police_memory ()
locates memory with the current NUMA policy. The constraints described for
.BR numa_interleave_memory ()
apply here too.
.BR numa_distance ()
reports the distance in the machine topology between two nodes.
The factors are a multiple of 10. It returns 0 when the distance
cannot be determined. A node has distance 10 to itself.
Reporting the distance requires a Linux
kernel version of
.I 2.6.10
or newer.
.BR numa_set_bind_policy ()
specifies whether calls that bind memory to a specific node should
use the preferred policy or a strict policy.
The preferred policy allows the kernel
to allocate memory on other nodes when there isn't enough free
on the target node. strict will fail the allocation in that case.
Setting the argument to specifies strict, 0 preferred.
Note that specifying more than one node non strict may only use
the first node in some kernel versions.
.BR numa_set_strict ()
sets a flag that says whether the functions allocating on specific
nodes should use use a strict policy. Strict means the allocation
will fail if the memory cannot be allocated on the target node.
Default operation is to fall back to other nodes.
This doesn't apply to interleave and default.
.BR numa_get_interleave_node()
is used by
.I libnuma
internally. It is probably not useful for user applications.
It uses the MPOL_F_NODE flag of the get_mempolicy system call, which is
not intended for application use (its operation may change or be removed
altogether in future kernel versions). See get_mempolicy(2).
.BR numa_pagesize()
returns the number of bytes in page. This function is simply a fast
alternative to repeated calls to the getpagesize system call.
See getpagesize(2).
.BR numa_sched_getaffinity()
retrieves a bitmask of the cpus on which a task may run. The task is
specified by
.I pid.
Returns the return value of the sched_getaffinity
system call. See sched_getaffinity(2).
The bitmask must be at least the size of the kernel's cpu mask structure. Use
.BR numa_allocate_cpumask()
to allocate it.
Test the bits in the mask by calling
.BR numa_bitmask_isbitset().
.BR numa_sched_setaffinity()
sets a task's allowed cpu's to those cpu's specified in
.I mask.
The task is specified by
.I pid.
Returns the return value of the sched_setaffinity system call.
See sched_setaffinity(2). You may allocate the bitmask with
.BR numa_allocate_cpumask().
Or the bitmask may be smaller than the kernel's cpu mask structure. For
example, call
.BR numa_bitmask_alloc()
using a maximum number of cpus from
.BR numa_num_configured_cpus().
Set the bits in the mask by calling
.BR numa_bitmask_setbit().
.BR numa_node_to_cpus ()
converts a node number to a bitmask of CPUs. The user must pass a bitmask
structure with a mask buffer long enough to represent all possible cpu's.
Use numa_allocate_cpumask() to create it. If the bitmask is not long enough
.I errno
will be set to
.I ERANGE
and \-1 returned. On success 0 is returned.
.BR numa_node_to_cpu_update ()
Mark cpus bitmask of all nodes stale, then get the latest bitmask by calling
.BR numa_node_to_cpus ()
This allows to update the libnuma state after a CPU hotplug event. The application
is in charge of detecting CPU hotplug events.
.BR numa_node_of_cpu ()
returns the node that a cpu belongs to. If the user supplies an invalid cpu
.I errno
will be set to
.I EINVAL
and \-1 will be returned.
.BR numa_allocate_cpumask
() returns a bitmask of a size equal to the kernel's cpu
mask (kernel type cpumask_t). In other words, large enough to represent
NR_CPUS cpus. This number of cpus can be gotten by calling
.BR numa_num_possible_cpus().
The bitmask is zero-filled.
.BR numa_free_cpumask
frees a cpumask previously allocate by
.I numa_allocate_cpumask.
.BR numa_allocate_nodemask()
returns a bitmask of a size equal to the kernel's node
mask (kernel type nodemask_t). In other words, large enough to represent
MAX_NUMNODES nodes. This number of nodes can be gotten by calling
.BR numa_num_possible_nodes().
The bitmask is zero-filled.
.BR numa_free_nodemask()
frees a nodemask previous allocated by
.I numa_allocate_nodemask().
.BR numa_bitmask_alloc()
allocates a bitmask structure and its associated bit mask.
The memory allocated for the bit mask contains enough words (type unsigned
long) to contain
.I n
bits. The bit mask is zero-filled. The bitmask
structure points to the bit mask and contains the
.I n
value.
.BR numa_bitmask_clearall()
sets all bits in the bit mask to 0. The bitmask structure
points to the bit mask and contains its size (
.I bmp
->size). The value of
.I bmp
is always returned. Note that
.BR numa_bitmask_alloc()
creates a zero-filled bit mask.
.BR numa_bitmask_clearbit()
sets a specified bit in a bit mask to 0. Nothing is done if
the
.I n
value is greater than the size of the bitmask (and no error is
returned). The value of
.I bmp
is always returned.
.BR numa_bitmask_equal()
returns 1 if two bitmasks are equal. It returns 0 if they
are not equal. If the bitmask structures control bit masks of different
sizes, the "missing" trailing bits of the smaller bit mask are considered
to be 0.
.BR numa_bitmask_free()
deallocates the memory of both the bitmask structure pointed
to by
.I bmp
and the bit mask. It is an error to attempt to free this bitmask twice.
.BR numa_bitmask_isbitset()
returns the value of a specified bit in a bit mask.
If the
.I n
value is greater than the size of the bit map, 0 is returned.
.BR numa_bitmask_nbytes()
returns the size (in bytes) of the bit mask controlled by
.I bmp.
The bit masks are always full words (type unsigned long), and the returned
size is the actual size of all those words.
.BR numa_bitmask_setall()
sets all bits in the bit mask to 1. The bitmask structure
points to the bit mask and contains its size (
.I bmp
->size).
The value of
.I bmp
is always returned.
.BR numa_bitmask_setbit()
sets a specified bit in a bit mask to 1. Nothing is done if
.I n
is greater than the size of the bitmask (and no error is
returned). The value of
.I bmp
is always returned.
.BR copy_bitmask_to_nodemask()
copies the body (the bit map itself) of the bitmask structure pointed
to by
.I bmp
to the nodemask_t structure pointed to by the
.I nodemask
pointer. If the two areas differ in size, the copy is truncated to the size
of the receiving field or zero-filled.
.BR copy_nodemask_to_bitmask()
copies the nodemask_t structure pointed to by the
.I nodemask
pointer to the body (the bit map itself) of the bitmask structure pointed
to by the
.I bmp
pointer. If the two areas differ in size, the copy is truncated to the size
of the receiving field or zero-filled.
.BR copy_bitmask_to_bitmask()
copies the body (the bit map itself) of the bitmask structure pointed
to by the
.I bmpfrom
pointer to the body of the bitmask structure pointed to by the
.I bmpto
pointer. If the two areas differ in size, the copy is truncated to the size
of the receiving field or zero-filled.
.BR numa_bitmask_weight()
returns a count of the bits that are set in the body of the bitmask pointed
to by the
.I bmp
argument.
.br
.BR numa_move_pages()
moves a list of pages in the address space of the currently
executing or current process.
It simply uses the move_pages system call.
.br
.I pid
- ID of task. If not valid, use the current task.
.br
.I count
- Number of pages.
.br
.I pages
- List of pages to move.
.br
.I nodes
- List of nodes to which pages can be moved.
.br
.I status
- Field to which status is to be returned.
.br
.I flags
- MPOL_MF_MOVE or MPOL_MF_MOVE_ALL
.br
See move_pages(2).
.BR numa_migrate_pages()
simply uses the migrate_pages system call to cause the pages of the calling
task, or a specified task, to be migated from one set of nodes to another.
See migrate_pages(2).
The bit masks representing the nodes should be allocated with
.BR numa_allocate_nodemask()
, or with
.BR numa_bitmask_alloc()
using an
.I n
value returned from
.BR numa_num_possible_nodes().
A task's current node set can be gotten by calling
.BR numa_get_membind().
Bits in the
.I tonodes
mask can be set by calls to
.BR numa_bitmask_setbit().
.BR numa_error ()
is a
.I libnuma
internal function that can be overridden by the
user program.
This function is called with a
.I char *
argument when a
.I libnuma
function fails.
Overriding the library internal definition
makes it possible to specify a different error handling strategy
when a
.I libnuma
function fails. It does not affect
.BR numa_available ().
The
.BR numa_error ()
function defined in
.I libnuma
prints an error on
.I stderr
and terminates
the program if
.I numa_exit_on_error
is set to a non-zero value.
The default value of
.I numa_exit_on_error
is zero.
.BR numa_warn ()
is a
.I libnuma
internal function that can be also overridden
by the user program.
It is called to warn the user when a
.I libnuma
function encounters a non-fatal error.
The default implementation
prints a warning to
.IR stderr .
The first argument is a unique
number identifying each warning. After that there is a
.BR printf (3)-style
format string and a variable number of arguments.
.I numa_warn
exits the program when
.I numa_exit_on_warn
is set to a non-zero value.
The default value of
.I numa_exit_on_warn
is zero.
.SH Compatibility with libnuma version 1
Binaries that were compiled for libnuma version 1 need not be re-compiled
to run with libnuma version 2.
.br
Source codes written for libnuma version 1 may be re-compiled without
change with version 2 installed. To do so, in the code's Makefile add
this option to CFLAGS: -DNUMA_VERSION1_COMPATIBILITY
.SH THREAD SAFETY
.I numa_set_bind_policy
and
.I numa_exit_on_error
are process global. The other calls are thread safe.
.SH COPYRIGHT
Copyright 2002, 2004, 2007, 2008 Andi Kleen, SuSE Labs.
.I libnuma
is under the GNU Lesser General Public License, v2.1.
.SH SEE ALSO
.BR get_mempolicy (2),
.BR set_mempolicy (2),
.BR getpagesize (2),
.BR mbind (2),
.BR mmap (2),
.BR shmat (2),
.BR numactl (8),
.BR sched_getaffinity (2)
.BR sched_setaffinity (2)
.BR move_pages (2)
.BR migrate_pages (2)
0707010000001E000081A400003EA6000000320000000161272043000035A7000000000000000000000000000000000000002200000000numactl-2.0.14.20.g4ee5e0c/numa.h/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
libnuma is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; version
2.1.
libnuma is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should find a copy of v2.1 of the GNU Lesser General Public License
somewhere on your Linux system; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#ifndef _NUMA_H
#define _NUMA_H 1
/* allow an application to test for the current programming interface: */
#define LIBNUMA_API_VERSION 2
/* Simple NUMA policy library */
#include <stddef.h>
#include <string.h>
#include <sys/types.h>
#include <stdlib.h>
#if defined(__x86_64__) || defined(__i386__)
#define NUMA_NUM_NODES 128
#else
#define NUMA_NUM_NODES 2048
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
unsigned long n[NUMA_NUM_NODES/(sizeof(unsigned long)*8)];
} nodemask_t;
struct bitmask {
unsigned long size; /* number of bits in the map */
unsigned long *maskp;
};
/* operations on struct bitmask */
int numa_bitmask_isbitset(const struct bitmask *, unsigned int);
struct bitmask *numa_bitmask_setall(struct bitmask *);
struct bitmask *numa_bitmask_clearall(struct bitmask *);
struct bitmask *numa_bitmask_setbit(struct bitmask *, unsigned int);
struct bitmask *numa_bitmask_clearbit(struct bitmask *, unsigned int);
unsigned int numa_bitmask_nbytes(struct bitmask *);
unsigned int numa_bitmask_weight(const struct bitmask *);
struct bitmask *numa_bitmask_alloc(unsigned int);
void numa_bitmask_free(struct bitmask *);
int numa_bitmask_equal(const struct bitmask *, const struct bitmask *);
void copy_nodemask_to_bitmask(nodemask_t *, struct bitmask *);
void copy_bitmask_to_nodemask(struct bitmask *, nodemask_t *);
void copy_bitmask_to_bitmask(struct bitmask *, struct bitmask *);
/* compatibility for codes that used them: */
static inline void nodemask_zero(nodemask_t *mask)
{
struct bitmask tmp;
tmp.maskp = (unsigned long *)mask;
tmp.size = sizeof(nodemask_t) * 8;
numa_bitmask_clearall(&tmp);
}
static inline void nodemask_zero_compat(nodemask_t *mask)
{
struct bitmask tmp;
tmp.maskp = (unsigned long *)mask;
tmp.size = sizeof(nodemask_t) * 8;
numa_bitmask_clearall(&tmp);
}
static inline void nodemask_set_compat(nodemask_t *mask, int node)
{
mask->n[node / (8*sizeof(unsigned long))] |=
(1UL<<(node%(8*sizeof(unsigned long))));
}
static inline void nodemask_clr_compat(nodemask_t *mask, int node)
{
mask->n[node / (8*sizeof(unsigned long))] &=
~(1UL<<(node%(8*sizeof(unsigned long))));
}
static inline int nodemask_isset_compat(const nodemask_t *mask, int node)
{
if ((unsigned)node >= NUMA_NUM_NODES)
return 0;
if (mask->n[node / (8*sizeof(unsigned long))] &
(1UL<<(node%(8*sizeof(unsigned long)))))
return 1;
return 0;
}
static inline int nodemask_equal(const nodemask_t *a, const nodemask_t *b)
{
struct bitmask tmp_a, tmp_b;
tmp_a.maskp = (unsigned long *)a;
tmp_a.size = sizeof(nodemask_t) * 8;
tmp_b.maskp = (unsigned long *)b;
tmp_b.size = sizeof(nodemask_t) * 8;
return numa_bitmask_equal(&tmp_a, &tmp_b);
}
static inline int nodemask_equal_compat(const nodemask_t *a, const nodemask_t *b)
{
struct bitmask tmp_a, tmp_b;
tmp_a.maskp = (unsigned long *)a;
tmp_a.size = sizeof(nodemask_t) * 8;
tmp_b.maskp = (unsigned long *)b;
tmp_b.size = sizeof(nodemask_t) * 8;
return numa_bitmask_equal(&tmp_a, &tmp_b);
}
/* NUMA support available. If this returns a negative value all other function
in this library are undefined. */
int numa_available(void);
/* Basic NUMA state */
/* Get max available node */
int numa_max_node(void);
int numa_max_possible_node(void);
/* Return preferred node */
int numa_preferred(void);
/* Return node size and free memory */
long long numa_node_size64(int node, long long *freep);
long long numa_node_size(int node, long long *freep);
int numa_pagesize(void);
/* Set with all nodes from which the calling process may allocate memory.
Only valid after numa_available. */
extern struct bitmask *numa_all_nodes_ptr;
/* Set with all nodes the kernel has exposed to userspace */
extern struct bitmask *numa_nodes_ptr;
/* For source compatibility */
extern nodemask_t numa_all_nodes;
/* Set with all cpus. */
extern struct bitmask *numa_all_cpus_ptr;
/* Set with no nodes */
extern struct bitmask *numa_no_nodes_ptr;
/* Source compatibility */
extern nodemask_t numa_no_nodes;
/* Only run and allocate memory from a specific set of nodes. */
void numa_bind(struct bitmask *nodes);
/* Set the NUMA node interleaving mask. 0 to turn off interleaving */
void numa_set_interleave_mask(struct bitmask *nodemask);
/* Return the current interleaving mask */
struct bitmask *numa_get_interleave_mask(void);
/* allocate a bitmask big enough for all nodes */
struct bitmask *numa_allocate_nodemask(void);
static inline void numa_free_nodemask(struct bitmask *b)
{
numa_bitmask_free(b);
}
/* Some node to preferably allocate memory from for task. */
void numa_set_preferred(int node);
/* Set local memory allocation policy for task */
void numa_set_localalloc(void);
/* Only allocate memory from the nodes set in mask. 0 to turn off */
void numa_set_membind(struct bitmask *nodemask);
/* Only allocate memory from the nodes set in mask. Optimize page
placement with Linux kernel NUMA balancing if possible. 0 to turn off */
void numa_set_membind_balancing(struct bitmask *bmp);
/* Return current membind */
struct bitmask *numa_get_membind(void);
/* Return allowed memories [nodes] */
struct bitmask *numa_get_mems_allowed(void);
int numa_get_interleave_node(void);
/* NUMA memory allocation. These functions always round to page size
and are relatively slow. */
/* Alloc memory page interleaved on nodes in mask */
void *numa_alloc_interleaved_subset(size_t size, struct bitmask *nodemask);
/* Alloc memory page interleaved on all nodes. */
void *numa_alloc_interleaved(size_t size);
/* Alloc memory located on node */
void *numa_alloc_onnode(size_t size, int node);
/* Alloc memory on local node */
void *numa_alloc_local(size_t size);
/* Allocation with current policy */
void *numa_alloc(size_t size);
/* Change the size of a memory area preserving the memory policy */
void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
/* Free memory allocated by the functions above */
void numa_free(void *mem, size_t size);
/* Low level functions, primarily for shared memory. All memory
processed by these must not be touched yet */
/* Interleave a memory area. */
void numa_interleave_memory(void *mem, size_t size, struct bitmask *mask);
/* Allocate a memory area on a specific node. */
void numa_tonode_memory(void *start, size_t size, int node);
/* Allocate memory on a mask of nodes. */
void numa_tonodemask_memory(void *mem, size_t size, struct bitmask *mask);
/* Allocate a memory area on the current node. */
void numa_setlocal_memory(void *start, size_t size);
/* Allocate memory area with current memory policy */
void numa_police_memory(void *start, size_t size);
/* Run current task only on nodes in mask */
int numa_run_on_node_mask(struct bitmask *mask);
/* Run current task on nodes in mask without any cpuset awareness */
int numa_run_on_node_mask_all(struct bitmask *mask);
/* Run current task only on node */
int numa_run_on_node(int node);
/* Return current mask of nodes the task can run on */
struct bitmask * numa_get_run_node_mask(void);
/* When strict fail allocation when memory cannot be allocated in target node(s). */
void numa_set_bind_policy(int strict);
/* Fail when existing memory has incompatible policy */
void numa_set_strict(int flag);
/* maximum nodes (size of kernel nodemask_t) */
int numa_num_possible_nodes(void);
/* maximum cpus (size of kernel cpumask_t) */
int numa_num_possible_cpus(void);
/* nodes in the system */
int numa_num_configured_nodes(void);
/* maximum cpus */
int numa_num_configured_cpus(void);
/* maximum cpus allowed to current task */
int numa_num_task_cpus(void);
int numa_num_thread_cpus(void); /* backward compatibility */
/* maximum nodes allowed to current task */
int numa_num_task_nodes(void);
int numa_num_thread_nodes(void); /* backward compatibility */
/* allocate a bitmask the size of the kernel cpumask_t */
struct bitmask *numa_allocate_cpumask(void);
static inline void numa_free_cpumask(struct bitmask *b)
{
numa_bitmask_free(b);
}
/* Convert node to CPU mask. -1/errno on failure, otherwise 0. */
int numa_node_to_cpus(int, struct bitmask *);
void numa_node_to_cpu_update(void);
/* report the node of the specified cpu. -1/errno on invalid cpu. */
int numa_node_of_cpu(int cpu);
/* Report distance of node1 from node2. 0 on error.*/
int numa_distance(int node1, int node2);
/* Error handling. */
/* This is an internal function in libnuma that can be overwritten by an user
program. Default is to print an error to stderr and exit if numa_exit_on_error
is true. */
void numa_error(char *where);
/* When true exit the program when a NUMA system call (except numa_available)
fails */
extern int numa_exit_on_error;
/* Warning function. Can also be overwritten. Default is to print on stderr
once. */
void numa_warn(int num, char *fmt, ...);
/* When true exit the program on a numa_warn() call */
extern int numa_exit_on_warn;
int numa_migrate_pages(int pid, struct bitmask *from, struct bitmask *to);
int numa_move_pages(int pid, unsigned long count, void **pages,
const int *nodes, int *status, int flags);
int numa_sched_getaffinity(pid_t, struct bitmask *);
int numa_sched_setaffinity(pid_t, struct bitmask *);
/* Convert an ascii list of nodes to a bitmask */
struct bitmask *numa_parse_nodestring(const char *);
/* Convert an ascii list of nodes to a bitmask without current nodeset
* dependency */
struct bitmask *numa_parse_nodestring_all(const char *);
/* Convert an ascii list of cpu to a bitmask */
struct bitmask *numa_parse_cpustring(const char *);
/* Convert an ascii list of cpu to a bitmask without current taskset
* dependency */
struct bitmask *numa_parse_cpustring_all(const char *);
/*
* The following functions are for source code compatibility
* with releases prior to version 2.
* Such codes should be compiled with NUMA_VERSION1_COMPATIBILITY defined.
*/
static inline void numa_set_interleave_mask_compat(nodemask_t *nodemask)
{
struct bitmask tmp;
tmp.maskp = (unsigned long *)nodemask;
tmp.size = sizeof(nodemask_t) * 8;
numa_set_interleave_mask(&tmp);
}
static inline nodemask_t numa_get_interleave_mask_compat(void)
{
struct bitmask *tp;
nodemask_t mask;
tp = numa_get_interleave_mask();
copy_bitmask_to_nodemask(tp, &mask);
numa_bitmask_free(tp);
return mask;
}
static inline void numa_bind_compat(nodemask_t *mask)
{
struct bitmask *tp;
tp = numa_allocate_nodemask();
copy_nodemask_to_bitmask(mask, tp);
numa_bind(tp);
numa_bitmask_free(tp);
}
static inline void numa_set_membind_compat(nodemask_t *mask)
{
struct bitmask tmp;
tmp.maskp = (unsigned long *)mask;
tmp.size = sizeof(nodemask_t) * 8;
numa_set_membind(&tmp);
}
static inline nodemask_t numa_get_membind_compat(void)
{
struct bitmask *tp;
nodemask_t mask;
tp = numa_get_membind();
copy_bitmask_to_nodemask(tp, &mask);
numa_bitmask_free(tp);
return mask;
}
static inline void *numa_alloc_interleaved_subset_compat(size_t size,
const nodemask_t *mask)
{
struct bitmask tmp;
tmp.maskp = (unsigned long *)mask;
tmp.size = sizeof(nodemask_t) * 8;
return numa_alloc_interleaved_subset(size, &tmp);
}
static inline int numa_run_on_node_mask_compat(const nodemask_t *mask)
{
struct bitmask tmp;
tmp.maskp = (unsigned long *)mask;
tmp.size = sizeof(nodemask_t) * 8;
return numa_run_on_node_mask(&tmp);
}
static inline nodemask_t numa_get_run_node_mask_compat(void)
{
struct bitmask *tp;
nodemask_t mask;
tp = numa_get_run_node_mask();
copy_bitmask_to_nodemask(tp, &mask);
numa_bitmask_free(tp);
return mask;
}
static inline void numa_interleave_memory_compat(void *mem, size_t size,
const nodemask_t *mask)
{
struct bitmask tmp;
tmp.maskp = (unsigned long *)mask;
tmp.size = sizeof(nodemask_t) * 8;
numa_interleave_memory(mem, size, &tmp);
}
static inline void numa_tonodemask_memory_compat(void *mem, size_t size,
const nodemask_t *mask)
{
struct bitmask tmp;
tmp.maskp = (unsigned long *)mask;
tmp.size = sizeof(nodemask_t) * 8;
numa_tonodemask_memory(mem, size, &tmp);
}
static inline int numa_sched_getaffinity_compat(pid_t pid, unsigned len,
unsigned long *mask)
{
struct bitmask tmp;
tmp.maskp = (unsigned long *)mask;
tmp.size = len * 8;
return numa_sched_getaffinity(pid, &tmp);
}
static inline int numa_sched_setaffinity_compat(pid_t pid, unsigned len,
unsigned long *mask)
{
struct bitmask tmp;
tmp.maskp = (unsigned long *)mask;
tmp.size = len * 8;
return numa_sched_setaffinity(pid, &tmp);
}
static inline int numa_node_to_cpus_compat(int node, unsigned long *buffer,
int buffer_len)
{
struct bitmask tmp;
tmp.maskp = (unsigned long *)buffer;
tmp.size = buffer_len * 8;
return numa_node_to_cpus(node, &tmp);
}
/* end of version 1 compatibility functions */
/*
* To compile an application that uses libnuma version 1:
* add -DNUMA_VERSION1_COMPATIBILITY to your Makefile's CFLAGS
*/
#ifdef NUMA_VERSION1_COMPATIBILITY
#include <numacompat1.h>
#endif
#ifdef __cplusplus
}
#endif
#endif
0707010000001F000081A400003EA6000000320000000161272043000000D8000000000000000000000000000000000000002600000000numactl-2.0.14.20.g4ee5e0c/numa.pc.inprefix=@prefix@
exec_prefix=@exec_prefix@
libdir=@libdir@
includedir=@includedir@
Name: numa
Description: NUMA policy library
Version: @VERSION@
Cflags: -I${includedir}
Libs: -L${libdir} -lnuma
Libs.Private: @LIBS@
07070100000020000081A400003EA6000000320000000161272043000004CF000000000000000000000000000000000000002900000000numactl-2.0.14.20.g4ee5e0c/numacompat1.h#define numa_set_interleave_mask(m) numa_set_interleave_mask_compat(m)
#define numa_get_interleave_mask() numa_get_interleave_mask_compat()
#define numa_bind(m) numa_bind_compat(m)
#define numa_get_membind(m) numa_get_membind_compat(m)
#define numa_set_membind(m) numa_set_membind_compat(m)
#define numa_alloc_interleaved_subset(s,m) numa_alloc_interleaved_subset_compat(s,m)
#define numa_run_on_node_mask(m) numa_run_on_node_mask_compat(m)
#define numa_get_run_node_mask() numa_get_run_node_mask_compat()
#define numa_interleave_memory(st,si,m) numa_interleave_memory_compat(st,si,m)
#define numa_tonodemask_memory(st,si,m) numa_tonodemask_memory_compat(st,si,m)
#define numa_sched_getaffinity(p,l,m) numa_sched_getaffinity_compat(p,l,m)
#define numa_sched_setaffinity(p,l,m) numa_sched_setaffinity_compat(p,l,m)
#define numa_node_to_cpus(n,b,bl) numa_node_to_cpus_compat(n,b,bl)
#define nodemask_zero(m) nodemask_zero_compat(m)
#define nodemask_set(m, n) nodemask_set_compat(m, n)
#define nodemask_clr(m, n) nodemask_clr_compat(m, n)
#define nodemask_isset(m, n) nodemask_isset_compat(m, n)
#define nodemask_equal(a, b) nodemask_equal_compat(a, b)
07070100000021000081A400003EA600000032000000016127204300002638000000000000000000000000000000000000002500000000numactl-2.0.14.20.g4ee5e0c/numactl.8.\" t
.\" Copyright 2003,2004 Andi Kleen, SuSE Labs.
.\"
.\" Permission is granted to make and distribute verbatim copies of this
.\" manual provided the copyright notice and this permission notice are
.\" preserved on all copies.
.\"
.\" Permission is granted to copy and distribute modified versions of this
.\" manual under the conditions for verbatim copying, provided that the
.\" entire resulting derived work is distributed under the terms of a
.\" permission notice identical to this one.
.\"
.\" Since the Linux kernel and libraries are constantly changing, this
.\" manual page may be incorrect or out-of-date. The author(s) assume no
.\" responsibility for errors or omissions, or for damages resulting from
.\" the use of the information contained herein.
.\"
.\" Formatted or processed versions of this manual, if unaccompanied by
.\" the source, must acknowledge the copyright and authors of this work.
.TH NUMACTL 8 "Mar 2004" "SuSE Labs" "Linux Administrator's Manual"
.SH NAME
numactl \- Control NUMA policy for processes or shared memory
.SH SYNOPSIS
.B numactl
[
.B \-\-all
] [
.B \-\-balancing
] [
.B \-\-interleave nodes
] [
.B \-\-preferred node
] [
.B \-\-membind nodes
] [
.B \-\-cpunodebind nodes
] [
.B \-\-physcpubind cpus
] [
.B \-\-localalloc
] [\-\-] command {arguments ...}
.br
.B numactl \-\-show
.br
.B numactl \-\-hardware
.br
.B numactl
[
.B \-\-huge
] [
.B \-\-offset offset
] [
.B \-\-shmmode shmmode
] [
.B \-\-length length
] [
.B \-\-strict
]
.br
[
.B \-\-shmid id
]
.B \-\-shm shmkeyfile
|
.B \-\-file tmpfsfile
.br
[
.B \-\-touch
] [
.B \-\-dump
] [
.B \-\-dump-nodes
]
memory policy
.SH DESCRIPTION
.B numactl
runs processes with a specific NUMA scheduling or memory placement policy.
The policy is set for command and inherited by all of its children.
In addition it can set persistent policy for shared memory segments or files.
.PP
Use -- before command if using command options that could be confused
with numactl options.
.PP
.I nodes
may be specified as N,N,N or N-N or N,N-N or N-N,N-N and so forth.
Relative
.I nodes
may be specified as +N,N,N or +N-N or +N,N-N and so forth. The + indicates that
the node numbers are relative to the process' set of allowed nodes in its
current cpuset.
A !N-N notation indicates the inverse of N-N, in other words all nodes
except N-N. If used with + notation, specify !+N-N. When
.I same
is specified the previous nodemask specified on the command line is used.
all means all nodes in the current cpuset.
.PP
Instead of a number a node can also be:
.TS
tab(|);
l l.
netdev:DEV|The node connected to network device DEV.
file:PATH |The node the block device of PATH.
ip:HOST |The node of the network device of HOST
block:PATH|The node of block device PATH
pci:[seg:]bus:dev[:func]|The node of a PCI device.
.TE
Note that block resolves the kernel block device names only
for udev names in /dev use
.I file:
.TP
Policy settings are:
.TP
.B \-\-all, \-a
Unset default cpuset awareness, so user can use all possible CPUs/nodes
for following policy settings.
.TP
.B \-\-interleave=nodes, \-i nodes
Set a memory interleave policy. Memory will be allocated using round robin
on
.I nodes.
When memory cannot be allocated on the current interleave target fall back
to other nodes.
Multiple nodes may be specified on --interleave, --membind and --cpunodebind.
.TP
.B \-\-membind=nodes, \-m nodes
Only allocate memory from nodes. Allocation will fail when there
is not enough memory available on these nodes.
.I nodes
may be specified as noted above.
.TP
.B \-\-cpunodebind=nodes, \-N nodes
Only execute
.I command
on the CPUs of
.I nodes.
Note that nodes may consist of multiple CPUs.
.I nodes
may be specified as noted above.
.TP
.B \-\-physcpubind=cpus, \-C cpus
Only execute
.I process
on
.I cpus.
This accepts cpu numbers as shown in the
.I processor
fields of
.I /proc/cpuinfo,
or relative cpus as in relative to the current cpuset.
You may specify "all", which means all cpus in the current cpuset.
Physical
.I cpus
may be specified as N,N,N or N-N or N,N-N or N-N,N-N and so forth.
Relative
.I cpus
may be specified as +N,N,N or +N-N or +N,N-N and so forth. The + indicates that
the cpu numbers are relative to the process' set of allowed cpus in its
current cpuset.
A !N-N notation indicates the inverse of N-N, in other words all cpus
except N-N. If used with + notation, specify !+N-N.
.TP
.B \-\-localalloc, \-l
Try to allocate on the current node of the process, but if memory cannot be allocated there fall back to other nodes.
.TP
.B \-\-preferred=node
Preferably allocate memory on
.I node,
but if memory cannot be allocated there fall back to other nodes.
This option takes only a single node number.
Relative notation may be used.
.TP
.B \-\-balancing, \-b
Enable Linux kernel NUMA balancing for the process if it is supported by kernel.
This should only be used with
.I \-\-membind, \-m
only, otherwise ignored.
.TP
.B \-\-show, \-s
Show NUMA policy settings of the current process.
.TP
.B \-\-hardware, \-H
Show inventory of available nodes on the system.
.TP 0
Numactl can set up policy for a SYSV shared memory segment or a file in shmfs/hugetlbfs.
This policy is persistent and will be used by
all mappings from that shared memory. The order of options matters here.
The specification must at least include either of
.I \-\-shm,
.I \-\-shmid,
.I \-\-file
to specify the shared memory segment or file and a memory policy like described
above (
.I \-\-interleave,
.I \-\-localalloc,
.I \-\-preferred,
.I \-\-membind
).
.TP
.B \-\-huge
When creating a SYSV shared memory segment use huge pages.
Only valid before \-\-shmid or \-\-shm
.TP
.B \-\-offset
Specify offset into the shared memory segment. Default 0.
Valid units are
.I m
(for MB),
.I g
(for GB),
.I k
(for KB),
otherwise it specifies bytes.
.TP
.B \-\-strict
Give an error when a page in the policied area in the shared memory
segment already was faulted in with a conflicting policy. Default
is to silently ignore this.
.TP
.B \-\-shmmode shmmode
Only valid before \-\-shmid or \-\-shm
When creating a shared memory segment set it to numeric mode
.I shmmode.
.TP
.B \-\-length length
Apply policy to
.I length
range in the shared memory segment or make
the segment length long
Default is to use the remaining length
Required when a shared memory segment is created and specifies the length
of the new segment then. Valid units are
.I m
(for MB),
.I g
(for GB),
.I k
(for KB),
otherwise it specifies bytes.
.TP
.B \-\-shmid id
Create or use a shared memory segment with numeric ID
.I id
.TP
.B \-\-shm shmkeyfile
Create or use a shared memory segment, with the ID generated
using
.I ftok(3)
from shmkeyfile
.TP
.B \-\-file tmpfsfile
Set policy for a file in tmpfs or hugetlbfs
.TP
.B \-\-touch
Touch pages to enforce policy early. Default is to not touch them, the policy
is applied when an applications maps and accesses a page.
.TP
.B \-\-dump
Dump policy in the specified range.
.TP
.B \-\-dump-nodes
Dump all nodes of the specific range (very verbose!)
.TP
Valid node specifiers
.TS
tab(:);
l l.
all:All nodes
number:Node number
number1{,number2}:Node number1 and Node number2
number1-number2:Nodes from number1 to number2
! nodes:Invert selection of the following specification.
.TE
.SH EXAMPLES
numactl \-\-physcpubind=+0-4,8-12 myapplic arguments
Run myapplic on cpus 0-4 and 8-12 of the current cpuset.
numactl \-\-interleave=all bigdatabase arguments
Run big database with its memory interleaved on all CPUs.
numactl \-\-cpunodebind=0 \-\-membind=0,1 process
Run process on node 0 with memory allocated on node 0 and 1.
numactl \-\-cpunodebind=0 \-\-membind=0,1 -- process -l
Run process as above, but with an option (-l) that would be confused with
a numactl option.
numactl \-\-cpunodebind=0 \-\-balancing \-\-membind=0,1 process
Run process on node 0 with memory allocated on node 0 and 1. Optimize the
page placement with Linux kernel NUMA balancing mechanism if possible.
numactl \-\-cpunodebind=netdev:eth0 \-\-membind=netdev:eth0 network-server
Run network-server on the node of network device eth0 with its memory
also in the same node.
numactl \-\-preferred=1 numactl \-\-show
Set preferred node 1 and show the resulting state.
numactl --interleave=all --shm /tmp/shmkey
Interleave all of the sysv shared memory region specified by
/tmp/shmkey over all nodes.
Place a tmpfs file on 2 nodes:
numactl --membind=2 dd if=/dev/zero of=/dev/shm/A bs=1M count=1024
numactl --membind=3 dd if=/dev/zero of=/dev/shm/A seek=1024 bs=1M count=1024
numactl --localalloc /dev/shm/file
Reset the policy for the shared memory file
.I file
to the default localalloc policy.
.SH NOTES
Requires a NUMA policy aware kernel.
Command is not executed using a shell. If you want to use shell metacharacters
in the child use sh -c as wrapper.
Setting policy for a hugetlbfs file does currently not work because
it cannot be extended by truncate.
Shared memory segments larger than numactl's address space cannot
be completely policied. This could be a problem on 32bit architectures.
Changing it piece by piece may work.
The old
.I --cpubind
which accepts node numbers, not cpu numbers, is deprecated
and replaced with the new
.I --cpunodebind
and
.I --physcpubind
options.
.SH FILES
.I /proc/cpuinfo
for the listing of active CPUs. See
.I proc(5)
for details.
.I /sys/devices/system/node/node*/numastat
for NUMA memory hit statistics.
.SH COPYRIGHT
Copyright 2002,2004 Andi Kleen, SuSE Labs.
numactl and the demo programs are under the GNU General Public License, v.2
.SH SEE ALSO
.I set_mempolicy(2)
,
.I get_mempolicy(2)
,
.I mbind(2)
,
.I sched_setaffinity(2)
,
.I sched_getaffinity(2)
,
.I proc(5)
,
.I ftok(3)
,
.I shmat(2)
,
.I migratepages(8)
07070100000022000081ED00003EA600000032000000016127204300003B75000000000000000000000000000000000000002500000000numactl-2.0.14.20.g4ee5e0c/numactl.c/* Copyright (C) 2003,2004,2005 Andi Kleen, SuSE Labs.
Command line NUMA policy control.
numactl is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; version
2.
numactl is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should find a copy of v2 of the GNU General Public License somewhere
on your Linux system; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#define _GNU_SOURCE
#include <getopt.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <ctype.h>
#include <assert.h>
#include "numa.h"
#include "numaif.h"
#include "numaint.h"
#include "util.h"
#include "shm.h"
#define CPUSET 0
#define ALL 1
int exitcode;
struct option opts[] = {
{"all", 0, 0, 'a'},
{"interleave", 1, 0, 'i' },
{"preferred", 1, 0, 'p' },
{"cpubind", 1, 0, 'c' },
{"cpunodebind", 1, 0, 'N' },
{"physcpubind", 1, 0, 'C' },
{"membind", 1, 0, 'm'},
{"show", 0, 0, 's' },
{"localalloc", 0,0, 'l'},
{"balancing", 0, 0, 'b'},
{"hardware", 0,0,'H' },
{"shm", 1, 0, 'S'},
{"file", 1, 0, 'f'},
{"offset", 1, 0, 'o'},
{"length", 1, 0, 'L'},
{"strict", 0, 0, 't'},
{"shmmode", 1, 0, 'M'},
{"dump", 0, 0, 'd'},
{"dump-nodes", 0, 0, 'D'},
{"shmid", 1, 0, 'I'},
{"huge", 0, 0, 'u'},
{"touch", 0, 0, 'T'},
{"verify", 0, 0, 'V'}, /* undocumented - for debugging */
{ 0 }
};
void usage(void)
{
fprintf(stderr,
"usage: numactl [--all | -a] [--balancing | -b] [--interleave= | -i <nodes>]\n"
" [--preferred= | -p <node>] [--physcpubind= | -C <cpus>]\n"
" [--cpunodebind= | -N <nodes>] [--membind= | -m <nodes>]\n"
" [--localalloc | -l] command args ...\n"
" numactl [--show | -s]\n"
" numactl [--hardware | -H]\n"
" numactl [--length | -L <length>] [--offset | -o <offset>] [--shmmode | -M <shmmode>]\n"
" [--strict | -t]\n"
" [--shmid | -I <id>] --shm | -S <shmkeyfile>\n"
" [--shmid | -I <id>] --file | -f <tmpfsfile>\n"
" [--huge | -u] [--touch | -T] \n"
" memory policy [--dump | -d] [--dump-nodes | -D]\n"
"\n"
"memory policy is --interleave | -i, --preferred | -p, --membind | -m, --localalloc | -l\n"
"<nodes> is a comma delimited list of node numbers or A-B ranges or all.\n"
"Instead of a number a node can also be:\n"
" netdev:DEV the node connected to network device DEV\n"
" file:PATH the node the block device of path is connected to\n"
" ip:HOST the node of the network device host routes through\n"
" block:PATH the node of block device path\n"
" pci:[seg:]bus:dev[:func] The node of a PCI device\n"
"<cpus> is a comma delimited list of cpu numbers or A-B ranges or all\n"
"all ranges can be inverted with !\n"
"all numbers and ranges can be made cpuset-relative with +\n"
"the old --cpubind argument is deprecated.\n"
"use --cpunodebind or --physcpubind instead\n"
"use --balancing | -b to enable Linux kernel NUMA balancing\n"
"for the process if it is supported by kernel\n"
"<length> can have g (GB), m (MB) or k (KB) suffixes\n");
exit(1);
}
void usage_msg(char *msg, ...)
{
va_list ap;
va_start(ap,msg);
fprintf(stderr, "numactl: ");
vfprintf(stderr, msg, ap);
putchar('\n');
usage();
va_end(ap);
}
void show_physcpubind(void)
{
int ncpus = numa_num_configured_cpus();
for (;;) {
struct bitmask *cpubuf;
cpubuf = numa_bitmask_alloc(ncpus);
if (numa_sched_getaffinity(0, cpubuf) < 0) {
if (errno == EINVAL && ncpus < 1024*1024) {
ncpus *= 2;
continue;
}
err("sched_get_affinity");
}
printmask("physcpubind", cpubuf);
break;
}
}
void show(void)
{
unsigned long prefnode;
struct bitmask *membind, *interleave, *cpubind;
unsigned long cur;
int policy;
if (numa_available() < 0) {
show_physcpubind();
printf("No NUMA support available on this system.\n");
exit(1);
}
cpubind = numa_get_run_node_mask();
prefnode = numa_preferred();
interleave = numa_get_interleave_mask();
membind = numa_get_membind();
cur = numa_get_interleave_node();
policy = 0;
if (get_mempolicy(&policy, NULL, 0, 0, 0) < 0)
perror("get_mempolicy");
printf("policy: %s\n", policy_name(policy));
printf("preferred node: ");
switch (policy) {
case MPOL_PREFERRED:
assert(prefnode != -1);
printf("%ld\n", prefnode);
break;
case MPOL_DEFAULT:
printf("current\n");
break;
case MPOL_INTERLEAVE:
printf("%ld (interleave next)\n",cur);
break;
case MPOL_BIND:
printf("%d\n", find_first(membind));
break;
}
if (policy == MPOL_INTERLEAVE) {
printmask("interleavemask", interleave);
printf("interleavenode: %ld\n", cur);
}
show_physcpubind();
printmask("cpubind", cpubind); // for compatibility
printmask("nodebind", cpubind);
printmask("membind", membind);
}
char *fmt_mem(unsigned long long mem, char *buf)
{
if (mem == -1L)
sprintf(buf, "<not available>");
else
sprintf(buf, "%llu MB", mem >> 20);
return buf;
}
static void print_distances(int maxnode)
{
int i,k;
int fst = 0;
for (i = 0; i <= maxnode; i++)
if (numa_bitmask_isbitset(numa_nodes_ptr, i)) {
fst = i;
break;
}
if (numa_distance(maxnode,fst) == 0) {
printf("No distance information available.\n");
return;
}
printf("node distances:\n");
printf("node ");
for (i = 0; i <= maxnode; i++)
if (numa_bitmask_isbitset(numa_nodes_ptr, i))
printf("% 3d ", i);
printf("\n");
for (i = 0; i <= maxnode; i++) {
if (!numa_bitmask_isbitset(numa_nodes_ptr, i))
continue;
printf("% 3d: ", i);
for (k = 0; k <= maxnode; k++)
if (numa_bitmask_isbitset(numa_nodes_ptr, i) &&
numa_bitmask_isbitset(numa_nodes_ptr, k))
printf("% 3d ", numa_distance(i,k));
printf("\n");
}
}
void print_node_cpus(int node)
{
int i, err;
struct bitmask *cpus;
cpus = numa_allocate_cpumask();
err = numa_node_to_cpus(node, cpus);
if (err >= 0) {
for (i = 0; i < cpus->size; i++)
if (numa_bitmask_isbitset(cpus, i))
printf(" %d", i);
}
putchar('\n');
}
void hardware(void)
{
int i;
int numnodes=0;
int prevnode=-1;
int skip=0;
int maxnode = numa_max_node();
if (numa_available() < 0) {
printf("No NUMA available on this system\n");
exit(1);
}
for (i=0; i<=maxnode; i++)
if (numa_bitmask_isbitset(numa_nodes_ptr, i))
numnodes++;
printf("available: %d nodes (", numnodes);
for (i=0; i<=maxnode; i++) {
if (numa_bitmask_isbitset(numa_nodes_ptr, i)) {
if (prevnode == -1) {
printf("%d", i);
prevnode=i;
continue;
}
if (i > prevnode + 1) {
if (skip) {
printf("%d", prevnode);
skip=0;
}
printf(",%d", i);
prevnode=i;
continue;
}
if (i == prevnode + 1) {
if (!skip) {
printf("-");
skip=1;
}
prevnode=i;
}
if ((i == maxnode) && skip)
printf("%d", prevnode);
}
}
printf(")\n");
for (i = 0; i <= maxnode; i++) {
char buf[64];
long long fr;
unsigned long long sz = numa_node_size64(i, &fr);
if (!numa_bitmask_isbitset(numa_nodes_ptr, i))
continue;
printf("node %d cpus:", i);
print_node_cpus(i);
printf("node %d size: %s\n", i, fmt_mem(sz, buf));
printf("node %d free: %s\n", i, fmt_mem(fr, buf));
}
print_distances(maxnode);
}
void checkerror(char *s)
{
if (errno) {
perror(s);
exit(1);
}
}
void checknuma(void)
{
static int numa = -1;
if (numa < 0) {
if (numa_available() < 0)
complain("This system does not support NUMA policy");
}
numa = 0;
}
int set_policy = -1;
void setpolicy(int pol)
{
if (set_policy != -1)
usage_msg("Conflicting policies");
set_policy = pol;
}
void nopolicy(void)
{
if (set_policy >= 0)
usage_msg("specify policy after --shm/--file");
}
int did_cpubind = 0;
int did_strict = 0;
int do_shm = 0;
int do_dump = 0;
int shmattached = 0;
int did_node_cpu_parse = 0;
int parse_all = 0;
int numa_balancing = 0;
char *shmoption;
void check_cpubind(int flag)
{
if (flag)
usage_msg("cannot do --cpubind on shared memory\n");
}
void noshm(char *opt)
{
if (shmattached)
usage_msg("%s must be before shared memory specification", opt);
shmoption = opt;
}
void dontshm(char *opt)
{
if (shmoption)
usage_msg("%s shm option is not allowed before %s", shmoption, opt);
}
void needshm(char *opt)
{
if (!shmattached)
usage_msg("%s must be after shared memory specification", opt);
}
void check_all_parse(int flag)
{
if (did_node_cpu_parse)
usage_msg("--all/-a option must be before all cpu/node specifications");
}
void get_short_opts(struct option *o, char *s)
{
*s++ = '+';
while (o->name) {
if (isprint(o->val)) {
*s++ = o->val;
if (o->has_arg)
*s++ = ':';
}
o++;
}
*s = '\0';
}
void check_shmbeyond(char *msg)
{
if (shmoffset >= shmlen) {
fprintf(stderr,
"numactl: region offset %#llx beyond its length %#llx at %s\n",
shmoffset, shmlen, msg);
exit(1);
}
}
static struct bitmask *numactl_parse_nodestring(char *s, int flag)
{
static char *last;
if (s[0] == 's' && !strcmp(s, "same")) {
if (!last)
usage_msg("same needs previous node specification");
s = last;
} else {
last = s;
}
if (flag == ALL)
return numa_parse_nodestring_all(s);
else
return numa_parse_nodestring(s);
}
int main(int ac, char **av)
{
int c, i, nnodes=0;
long node=-1;
char *end;
char shortopts[array_len(opts)*2 + 1];
struct bitmask *mask = NULL;
get_short_opts(opts,shortopts);
while ((c = getopt_long(ac, av, shortopts, opts, NULL)) != -1) {
switch (c) {
case 's': /* --show */
show();
exit(0);
case 'H': /* --hardware */
nopolicy();
hardware();
exit(0);
case 'b': /* --balancing */
nopolicy();
numa_balancing = 1;
break;
case 'i': /* --interleave */
checknuma();
if (parse_all)
mask = numactl_parse_nodestring(optarg, ALL);
else
mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
}
errno = 0;
did_node_cpu_parse = 1;
setpolicy(MPOL_INTERLEAVE);
if (shmfd >= 0)
numa_interleave_memory(shmptr, shmlen, mask);
else
numa_set_interleave_mask(mask);
checkerror("setting interleave mask");
break;
case 'N': /* --cpunodebind */
case 'c': /* --cpubind */
dontshm("-c/--cpubind/--cpunodebind");
checknuma();
if (parse_all)
mask = numactl_parse_nodestring(optarg, ALL);
else
mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
}
errno = 0;
check_cpubind(do_shm);
did_cpubind = 1;
did_node_cpu_parse = 1;
numa_run_on_node_mask_all(mask);
checkerror("sched_setaffinity");
break;
case 'C': /* --physcpubind */
{
struct bitmask *cpubuf;
dontshm("-C/--physcpubind");
if (parse_all)
cpubuf = numa_parse_cpustring_all(optarg);
else
cpubuf = numa_parse_cpustring(optarg);
if (!cpubuf) {
printf ("<%s> is invalid\n", optarg);
usage();
}
errno = 0;
check_cpubind(do_shm);
did_cpubind = 1;
did_node_cpu_parse = 1;
numa_sched_setaffinity(0, cpubuf);
checkerror("sched_setaffinity");
free(cpubuf);
break;
}
case 'm': /* --membind */
checknuma();
setpolicy(MPOL_BIND);
if (parse_all)
mask = numactl_parse_nodestring(optarg, ALL);
else
mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
}
errno = 0;
did_node_cpu_parse = 1;
numa_set_bind_policy(1);
if (shmfd >= 0) {
numa_tonodemask_memory(shmptr, shmlen, mask);
} else if (numa_balancing) {
numa_set_membind_balancing(mask);
} else {
numa_set_membind(mask);
}
numa_set_bind_policy(0);
checkerror("setting membind");
break;
case 'p': /* --preferred */
checknuma();
setpolicy(MPOL_PREFERRED);
if (parse_all)
mask = numactl_parse_nodestring(optarg, ALL);
else
mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
}
for (i=0; i<mask->size; i++) {
if (numa_bitmask_isbitset(mask, i)) {
node = i;
nnodes++;
}
}
if (nnodes != 1)
usage();
errno = 0;
did_node_cpu_parse = 1;
numa_set_bind_policy(0);
if (shmfd >= 0)
numa_tonode_memory(shmptr, shmlen, node);
else
numa_set_preferred(node);
checkerror("setting preferred node");
break;
case 'l': /* --local */
checknuma();
setpolicy(MPOL_DEFAULT);
errno = 0;
if (shmfd >= 0)
numa_setlocal_memory(shmptr, shmlen);
else
numa_set_localalloc();
checkerror("local allocation");
break;
case 'S': /* --shm */
check_cpubind(did_cpubind);
nopolicy();
attach_sysvshm(optarg, "--shm");
shmattached = 1;
break;
case 'f': /* --file */
check_cpubind(did_cpubind);
nopolicy();
attach_shared(optarg, "--file");
shmattached = 1;
break;
case 'L': /* --length */
noshm("--length");
shmlen = memsize(optarg);
break;
case 'M': /* --shmmode */
noshm("--shmmode");
shmmode = strtoul(optarg, &end, 8);
if (end == optarg || *end)
usage();
break;
case 'd': /* --dump */
if (shmfd < 0)
complain(
"Cannot do --dump without shared memory.\n");
dump_shm();
do_dump = 1;
break;
case 'D': /* --dump-nodes */
if (shmfd < 0)
complain(
"Cannot do --dump-nodes without shared memory.\n");
dump_shm_nodes();
do_dump = 1;
break;
case 't': /* --strict */
did_strict = 1;
numa_set_strict(1);
break;
case 'I': /* --shmid */
shmid = strtoul(optarg, &end, 0);
if (end == optarg || *end)
usage();
break;
case 'u': /* --huge */
noshm("--huge");
shmflags |= SHM_HUGETLB;
break;
case 'o': /* --offset */
noshm("--offset");
shmoffset = memsize(optarg);
break;
case 'T': /* --touch */
needshm("--touch");
check_shmbeyond("--touch");
numa_police_memory(shmptr, shmlen);
break;
case 'V': /* --verify */
needshm("--verify");
if (set_policy < 0)
complain("Need a policy first to verify");
check_shmbeyond("--verify");
numa_police_memory(shmptr, shmlen);
if (!mask)
complain("Need a mask to verify");
else
verify_shm(set_policy, mask);
break;
case 'a': /* --all */
check_all_parse(did_node_cpu_parse);
parse_all = 1;
break;
default:
usage();
}
}
av += optind;
ac -= optind;
if (shmfd >= 0) {
if (*av)
usage();
exit(exitcode);
}
if (did_strict)
fprintf(stderr,
"numactl: warning. Strict flag for process ignored.\n");
if (do_dump)
usage_msg("cannot do --dump|--dump-shm for process");
if (shmoption)
usage_msg("shm related option %s for process", shmoption);
if (*av == NULL)
usage();
execvp(*av, av);
complain("execution of `%s': %s\n", av[0], strerror(errno));
return 0; /* not reached */
}
07070100000023000081A400003EA6000000320000000161272043000007BE000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/numactl.specName: numactl
Summary: Library for tuning for Non Uniform Memory Access machines
Version: 2.0.12
Release: 3%{dist}
# libnuma is LGPLv2 and GPLv2
# numactl binaries are GPLv2 only
License: GPLv2
URL: https://github.com/numactl/numactl
Source0: https://github.com/numactl/numactl/releases/download/v2.0.12/numactl-2.0.12.tar.gz
BuildRequires: libtool automake autoconf
ExcludeArch: s390 %{arm}
%description
Simple NUMA policy support. It consists of a numactl program to run
other programs with a specific NUMA policy.
%package libs
Summary: libnuma libraries
# There is a tiny bit of GPLv2 code in libnuma.c
License: LGPLv2 and GPLv2
%description libs
numactl-libs provides libnuma, a library to do allocations with
NUMA policy in applications.
%package devel
Summary: Development package for building Applications that use numa
Requires: %{name}-libs = %{version}-%{release}
License: LGPLv2 and GPLv2
%description devel
Provides development headers for numa library calls
%prep
%setup -q -n %{name}-%{version}
%build
%configure --prefix=/usr --libdir=%{_libdir}
# Using recipe to fix rpaths, from here:
# https://fedoraproject.org/wiki/RPath_Packaging_Draft#Removing_Rpath
sed -i -e 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' \
-e 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool
make clean
make CFLAGS="$RPM_OPT_FLAGS -I."
%install
rm -rf $RPM_BUILD_ROOT
make DESTDIR=$RPM_BUILD_ROOT install
%ldconfig_scriptlets
%ldconfig_scriptlets libs
%files
%doc README.md
%{_bindir}/numactl
%{_bindir}/numademo
%{_bindir}/numastat
%{_bindir}/memhog
%{_bindir}/migspeed
%{_bindir}/migratepages
%{_mandir}/man8/*.8*
%exclude %{_mandir}/man2/*.2*
%files libs
%{_libdir}/libnuma.so.1.0.0
%{_libdir}/libnuma.so.1
%files devel
%{_libdir}/libnuma.so
%exclude %{_libdir}/libnuma.a
%exclude %{_libdir}/libnuma.la
%{_libdir}/pkgconfig/numa.pc
%{_includedir}/numa.h
%{_includedir}/numaif.h
%{_includedir}/numacompat1.h
%{_mandir}/man3/*.3*
07070100000024000081A400003EA6000000320000000161272043000033A3000000000000000000000000000000000000002600000000numactl-2.0.14.20.g4ee5e0c/numademo.c/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
Test/demo program for libnuma. This is also a more or less useful benchmark
of the NUMA characteristics of your machine. It benchmarks most possible
NUMA policy memory configurations with various benchmarks.
Compile standalone with cc -O2 numademo.c -o numademo -lnuma -lm
numactl is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; version
2.
numactl is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should find a copy of v2 of the GNU General Public License somewhere
on your Linux system; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#define _GNU_SOURCE 1
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/time.h>
#include "numa.h"
#ifdef HAVE_STREAM_LIB
#include "stream_lib.h"
#endif
#ifdef HAVE_MT
#include "mt.h"
#endif
#ifdef HAVE_CLEAR_CACHE
#include "clearcache.h"
#else
static inline void clearcache(void *a, unsigned size) {}
#endif
#define FRACT_NODES 8
#define FRACT_MASKS 32
int fract_nodes;
int *node_to_use;
unsigned long msize;
/* Should get this from cpuinfo, but on !x86 it's not there */
enum {
CACHELINESIZE = 64,
};
enum test {
MEMSET = 0,
MEMCPY,
FORWARD,
BACKWARD,
STREAM,
RANDOM2,
PTRCHASE,
} thistest;
char *delim = " ";
int force;
int regression_testing=0;
char *testname[] = {
"memset",
"memcpy",
"forward",
"backward",
#ifdef HAVE_STREAM_LIB
"stream",
#endif
#ifdef HAVE_MT
"random2",
#endif
"ptrchase",
NULL,
};
void output(char *title, char *result)
{
if (!isspace(delim[0]))
printf("%s%s%s\n", title,delim, result);
else
printf("%-42s%s\n", title, result);
}
#ifdef HAVE_STREAM_LIB
void do_stream(char *name, unsigned char *mem)
{
int i;
char title[100], buf[100];
double res[STREAM_NRESULTS];
stream_verbose = 0;
clearcache(mem, msize);
stream_init(mem);
stream_test(res);
sprintf(title, "%s%s%s", name, delim, "STREAM");
buf[0] = '\0';
for (i = 0; i < STREAM_NRESULTS; i++) {
if (buf[0])
strcat(buf,delim);
sprintf(buf+strlen(buf), "%s%s%.2f%sMB/s",
stream_names[i], delim, res[i], delim);
}
output(title, buf);
clearcache(mem, msize);
}
#endif
/* Set up a randomly distributed list to fool prefetchers */
union node {
union node *next;
struct {
unsigned nexti;
unsigned val;
};
};
static int cmp_node(const void *ap, const void *bp)
{
union node *a = (union node *)ap;
union node *b = (union node *)bp;
return a->val - b->val;
}
void **ptrchase_init(unsigned char *mem)
{
long i;
union node *nodes = (union node *)mem;
long nmemb = msize / sizeof(union node);
srand(1234);
for (i = 0; i < nmemb; i++) {
nodes[i].val = rand();
nodes[i].nexti = i + 1;
}
qsort(nodes, nmemb, sizeof(union node), cmp_node);
for (i = 0; i < nmemb; i++) {
union node *n = &nodes[i];
n->next = n->nexti >= nmemb ? NULL : &nodes[n->nexti];
}
return (void **)nodes;
}
static inline unsigned long long timerfold(struct timeval *tv)
{
return tv->tv_sec * 1000000ULL + tv->tv_usec;
}
#define LOOPS 10
void memtest(char *name, unsigned char *mem)
{
long k;
struct timeval start, end, res;
unsigned long long max, min, sum, r;
int i;
char title[128], result[128];
if (!mem) {
fprintf(stderr,
"Failed to allocate %lu bytes of memory. Test \"%s\" exits.\n",
msize, name);
return;
}
#ifdef HAVE_STREAM_LIB
if (thistest == STREAM) {
do_stream(name, mem);
goto out;
}
#endif
max = 0;
min = ~0UL;
sum = 0;
/*
* Note: 0th pass allocates the pages, don't measure
*/
for (i = 0; i < LOOPS+1; i++) {
clearcache(mem, msize);
switch (thistest) {
case PTRCHASE:
{
void **ptr;
ptr = ptrchase_init(mem);
gettimeofday(&start,NULL);
while (*ptr)
ptr = (void **)*ptr;
gettimeofday(&end,NULL);
/* Side effect to trick the optimizer */
*ptr = "bla";
break;
}
case MEMSET:
gettimeofday(&start,NULL);
memset(mem, 0xff, msize);
gettimeofday(&end,NULL);
break;
case MEMCPY:
gettimeofday(&start,NULL);
memcpy(mem, mem + msize/2, msize/2);
gettimeofday(&end,NULL);
break;
case FORWARD:
/* simple kernel to just fetch cachelines and write them back.
will trigger hardware prefetch */
gettimeofday(&start,NULL);
for (k = 0; k < msize; k+=CACHELINESIZE)
mem[k]++;
gettimeofday(&end,NULL);
break;
case BACKWARD:
gettimeofday(&start,NULL);
for (k = msize-5; k > 0; k-=CACHELINESIZE)
mem[k]--;
gettimeofday(&end,NULL);
break;
#ifdef HAVE_MT
case RANDOM2:
{
unsigned * __restrict m = (unsigned *)mem;
unsigned max = msize / sizeof(unsigned);
unsigned mask;
mt_init();
mask = 1;
while (mask < max)
mask = (mask << 1) | 1;
/*
* There's no guarantee all memory is touched, but
* we assume (hope) that the distribution of the MT
* is good enough to touch most.
*/
gettimeofday(&start,NULL);
for (k = 0; k < max; k++) {
unsigned idx = mt_random() & mask;
if (idx >= max)
idx -= max;
m[idx]++;
}
gettimeofday(&end,NULL);
}
#endif
default:
break;
}
if (!i)
continue; /* don't count allocation pass */
timersub(&end, &start, &res);
r = timerfold(&res);
if (r > max) max = r;
if (r < min) min = r;
sum += r;
}
sprintf(title, "%s%s%s", name, delim, testname[thistest]);
#define H(t) (((double)msize) / ((double)t))
#define D3 delim,delim,delim
sprintf(result, "Avg%s%.2f%sMB/s%sMax%s%.2f%sMB/s%sMin%s%.2f%sMB/s",
delim,
H(sum/LOOPS),
D3,
H(min),
D3,
H(max),
delim);
#undef H
#undef D3
output(title,result);
#ifdef HAVE_STREAM_LIB
out:
#endif
/* Just to make sure that when we switch CPUs that the old guy
doesn't still keep it around. */
clearcache(mem, msize);
numa_free(mem, msize);
}
int popcnt(unsigned long val)
{
int i = 0, cnt = 0;
while (val >> i) {
if ((1UL << i) & val)
cnt++;
i++;
}
return cnt;
}
int max_node, numnodes;
int get_node_list(void)
{
int a, got_nodes = 0;
long long free_node_sizes;
numnodes = numa_num_configured_nodes();
node_to_use = (int *)malloc(numnodes * sizeof(int));
max_node = numa_max_node();
for (a = 0; a <= max_node; a++) {
if (numa_node_size(a, &free_node_sizes) > 0)
node_to_use[got_nodes++] = a;
}
if(got_nodes != numnodes)
return -1;
return got_nodes;
}
void test(enum test type)
{
unsigned long mask;
int i, k;
char buf[512];
struct bitmask *nodes;
nodes = numa_allocate_nodemask();
thistest = type;
if (regression_testing) {
printf("\nTest %s doing 1 of %d nodes and 1 of %d masks.\n",
testname[thistest], fract_nodes, FRACT_MASKS);
}
memtest("memory with no policy", numa_alloc(msize));
memtest("local memory", numa_alloc_local(msize));
memtest("memory interleaved on all nodes", numa_alloc_interleaved(msize));
for (i = 0; i < numnodes; i++) {
if (regression_testing && (i % fract_nodes)) {
/* for regression testing (-t) do only every eighth node */
continue;
}
sprintf(buf, "memory on node %d", node_to_use[i]);
memtest(buf, numa_alloc_onnode(msize, node_to_use[i]));
}
for (mask = 1, i = 0; mask < (1UL<<numnodes); mask++, i++) {
int w;
char buf2[20];
if (popcnt(mask) == 1)
continue;
if (regression_testing && (i > 50)) {
break;
}
if (regression_testing && (i % FRACT_MASKS)) {
/* for regression testing (-t)
do only every 32nd mask permutation */
continue;
}
numa_bitmask_clearall(nodes);
for (w = 0; mask >> w; w++) {
if ((mask >> w) & 1)
numa_bitmask_setbit(nodes, w);
}
sprintf(buf, "memory interleaved on");
for (k = 0; k < numnodes; k++)
if ((1UL<<node_to_use[k]) & mask) {
sprintf(buf2, " %d", node_to_use[k]);
strcat(buf, buf2);
}
memtest(buf, numa_alloc_interleaved_subset(msize, nodes));
}
for (i = 0; i < numnodes; i++) {
if (regression_testing && (node_to_use[i] % fract_nodes)) {
/* for regression testing (-t) do only every eighth node */
continue;
}
printf("setting preferred node to %d\n", node_to_use[i]);
numa_set_preferred(node_to_use[i]);
memtest("memory with preferred policy", numa_alloc(msize));
}
numa_set_interleave_mask(numa_all_nodes_ptr);
memtest("manual interleaving to all nodes", numa_alloc(msize));
if (numnodes > 0) {
numa_bitmask_clearall(nodes);
numa_bitmask_setbit(nodes, node_to_use[0]);
numa_bitmask_setbit(nodes, node_to_use[1]);
numa_set_interleave_mask(nodes);
memtest("manual interleaving on first two nodes", numa_alloc(msize));
printf("current interleave node %d\n", numa_get_interleave_node());
}
numa_bitmask_free(nodes);
numa_set_interleave_mask(numa_no_nodes_ptr);
nodes = numa_allocate_nodemask();
for (i = 0; i < numnodes; i++) {
int oldhn = numa_preferred();
if (regression_testing && (i % fract_nodes)) {
/* for regression testing (-t) do only every eighth node */
continue;
}
numa_run_on_node(node_to_use[i]);
printf("running on node %d, preferred node %d\n",node_to_use[i], oldhn);
memtest("local memory", numa_alloc_local(msize));
memtest("memory interleaved on all nodes",
numa_alloc_interleaved(msize));
if (numnodes >= 2) {
numa_bitmask_clearall(nodes);
numa_bitmask_setbit(nodes, node_to_use[0]);
numa_bitmask_setbit(nodes, node_to_use[1]);
memtest("memory interleaved on first two nodes",
numa_alloc_interleaved_subset(msize, nodes));
}
for (k = 0; k < numnodes; k++) {
if (node_to_use[k] == node_to_use[i])
continue;
if (regression_testing && (node_to_use[k] % fract_nodes)) {
/* for regression testing (-t)
do only every eighth node */
continue;
}
sprintf(buf, "alloc on node %d", node_to_use[k]);
numa_bitmask_clearall(nodes);
numa_bitmask_setbit(nodes, node_to_use[k]);
numa_set_membind(nodes);
memtest(buf, numa_alloc(msize));
numa_set_membind(numa_all_nodes_ptr);
}
numa_set_localalloc();
memtest("local allocation", numa_alloc(msize));
numa_set_preferred(node_to_use[(i + 1) % numnodes]);
memtest("setting wrong preferred node", numa_alloc(msize));
numa_set_preferred(node_to_use[i]);
memtest("setting correct preferred node", numa_alloc(msize));
numa_set_preferred(-1);
if (!delim[0])
printf("\n\n\n");
}
numa_bitmask_free(nodes);
/* numa_run_on_node_mask is not tested */
}
void usage(void)
{
int i;
printf("usage: numademo [-S] [-f] [-c] [-e] [-t] msize[kmg] {tests}\nNo tests means run all.\n");
printf("-c output CSV data. -f run even without NUMA API. -S run stupid tests. -e exit on error\n");
printf("-t regression test; do not run all node combinations\n");
printf("valid tests:");
for (i = 0; testname[i]; i++)
printf(" %s", testname[i]);
putchar('\n');
exit(1);
}
/* duplicated to make numademo standalone */
long memsize(char *s)
{
char *end;
long length = strtoul(s,&end,0);
switch (toupper(*end)) {
case 'G': length *= 1024; /*FALL THROUGH*/
case 'M': length *= 1024; /*FALL THROUGH*/
case 'K': length *= 1024; break;
}
return length;
}
int main(int ac, char **av)
{
int simple_tests = 0;
int nr_nodes;
while (av[1] && av[1][0] == '-') {
ac--;
switch (av[1][1]) {
case 'c':
delim = ",";
break;
case 'f':
force = 1;
break;
case 'S':
simple_tests = 1;
break;
case 'e':
numa_exit_on_error = 1;
numa_exit_on_warn = 1;
break;
case 't':
regression_testing = 1;
break;
default:
usage();
break;
}
++av;
}
if (!av[1])
usage();
if (numa_available() < 0) {
printf("your system does not support the numa API.\n");
if (!force)
exit(1);
}
nr_nodes = get_node_list();
if(nr_nodes == -1){
fprintf(stderr, "Configured Nodes does not match available memory nodes\n");
exit(1);
}
if (nr_nodes < 2) {
printf("A minimum of 2 nodes is required for this test.\n");
exit(77);
}
printf("%d nodes available\n", numnodes);
fract_nodes = (((numnodes-1)/8)*2) + FRACT_NODES;
if (numnodes <= 3)
regression_testing = 0; /* set -t auto-off for small systems */
msize = memsize(av[1]);
if (!msize)
usage();
#ifdef HAVE_STREAM_LIB
stream_setmem(msize);
#endif
if (av[2] == NULL) {
test(MEMSET);
test(MEMCPY);
if (simple_tests) {
test(FORWARD);
test(BACKWARD);
}
#ifdef HAVE_MT
test(RANDOM2);
#endif
#ifdef HAVE_STREAM_LIB
test(STREAM);
#endif
if (msize >= sizeof(union node)) {
test(PTRCHASE);
} else {
fprintf(stderr, "You must set msize at least %lu bytes for ptrchase test.\n",
sizeof(union node));
exit(1);
}
} else {
int k;
for (k = 2; k < ac; k++) {
int i;
int found = 0;
for (i = 0; testname[i]; i++) {
if (!strcmp(testname[i],av[k])) {
test(i);
found = 1;
break;
}
}
if (!found) {
fprintf(stderr,"unknown test `%s'\n", av[k]);
usage();
}
}
}
free(node_to_use);
return 0;
}
07070100000025000081A400003EA60000003200000001612720430000067A000000000000000000000000000000000000002400000000numactl-2.0.14.20.g4ee5e0c/numaif.h#ifndef NUMAIF_H
#define NUMAIF_H 1
#ifdef __cplusplus
extern "C" {
#endif
/* Kernel interface for NUMA API */
/* System calls */
extern long get_mempolicy(int *mode, unsigned long *nmask,
unsigned long maxnode, void *addr, unsigned flags);
extern long mbind(void *start, unsigned long len, int mode,
const unsigned long *nmask, unsigned long maxnode, unsigned flags);
extern long set_mempolicy(int mode, const unsigned long *nmask,
unsigned long maxnode);
extern long migrate_pages(int pid, unsigned long maxnode,
const unsigned long *frommask,
const unsigned long *tomask);
extern long move_pages(int pid, unsigned long count,
void **pages, const int *nodes, int *status, int flags);
/* Policies */
#define MPOL_DEFAULT 0
#define MPOL_PREFERRED 1
#define MPOL_BIND 2
#define MPOL_INTERLEAVE 3
#define MPOL_LOCAL 4
#define MPOL_MAX 5
/* Flags for set_mempolicy, specified in mode */
#define MPOL_F_NUMA_BALANCING (1 << 13) /* Optimize with NUMA balancing if possible */
/* Flags for get_mem_policy */
#define MPOL_F_NODE (1<<0) /* return next il node or node of address */
/* Warning: MPOL_F_NODE is unsupported and
subject to change. Don't use. */
#define MPOL_F_ADDR (1<<1) /* look up vma using address */
#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */
/* Flags for mbind */
#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
#ifdef __cplusplus
}
#endif
#endif
07070100000026000081A400003EA600000032000000016127204300000693000000000000000000000000000000000000002500000000numactl-2.0.14.20.g4ee5e0c/numaint.h/* Internal interfaces of libnuma */
extern int numa_sched_setaffinity_v1(pid_t pid, unsigned len, const unsigned long *mask);
extern int numa_sched_getaffinity_v1(pid_t pid, unsigned len, const unsigned long *mask);
extern int numa_sched_setaffinity_v1_int(pid_t pid, unsigned len,const unsigned long *mask);
extern int numa_sched_getaffinity_v1_int(pid_t pid, unsigned len,const unsigned long *mask);
extern int numa_sched_setaffinity_v2(pid_t pid, struct bitmask *mask);
extern int numa_sched_getaffinity_v2(pid_t pid, struct bitmask *mask);
extern int numa_sched_setaffinity_v2_int(pid_t pid, struct bitmask *mask);
extern int numa_sched_getaffinity_v2_int(pid_t pid, struct bitmask *mask);
#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */
#define BITS_PER_LONG (sizeof(unsigned long) * 8)
#define CPU_BYTES(x) (round_up(x, BITS_PER_LONG)/8)
#define CPU_LONGS(x) (CPU_BYTES(x) / sizeof(long))
#define make_internal_alias(x) extern __typeof (x) x##_int __attribute((alias(#x), visibility("hidden")))
#define hidden __attribute__((visibility("hidden")))
enum numa_warn {
W_nosysfs,
W_noproc,
W_badmeminfo,
W_nosysfs2,
W_cpumap,
W_numcpus,
W_noderunmask,
W_distance,
W_memory,
W_cpuparse,
W_nodeparse,
W_blockdev1,
W_blockdev2,
W_blockdev3,
W_blockdev4,
W_blockdev5,
W_netlink1,
W_netlink2,
W_netlink3,
W_net1,
W_net2,
W_class1,
W_class2,
W_pci1,
W_pci2,
W_node_parse1,
W_node_parse2,
W_nonode,
W_badchar,
};
#define howmany(x,y) (((x)+((y)-1))/(y))
#define bitsperlong (8 * sizeof(unsigned long))
#define bitsperint (8 * sizeof(unsigned int))
#define longsperbits(n) howmany(n, bitsperlong)
#define bytesperbits(x) ((x+7)/8)
07070100000027000081A400003EA600000032000000016127204300001B8C000000000000000000000000000000000000002500000000numactl-2.0.14.20.g4ee5e0c/numamon.c/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
numamon is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; version
2.
numamon is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should find a copy of v2 of the GNU General Public License somewhere
on your Linux system; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Display some numa statistics collected by the CPU.
Opteron specific. Also not reliable because the counters
are not quite correct in hardware. */
#define _LARGE_FILE_SOURCE 1
#define _GNU_SOURCE 1
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <dirent.h>
#include <getopt.h>
#include <stdarg.h>
#include <stdlib.h>
#include <sys/fcntl.h>
enum { LOCALLOCAL = 0, LOCALREMOTE = 1, REMOTELOCAL = 2 };
static int mem[] = { [LOCALLOCAL] = 0xa8, [LOCALREMOTE] = 0x98, [REMOTELOCAL] = 0x68 };
static int io[] = { [LOCALLOCAL] = 0xa4, [LOCALREMOTE] = 0x94, [REMOTELOCAL] = 0x64 };
static int *masks = mem;
#define err(x) perror(x),exit(1)
#define PERFEVTSEL0 0xc0010000
#define PERFEVTSEL1 0xc0010001
#define PERFEVTSEL2 0xc0010002
#define PERFEVTSEL3 0xc0010003
#define PERFCTR0 0xc0010004
#define PERFCTR1 0xc0010005
#define PERFCTR2 0xc0010006
#define PERFCTR3 0xc0010007
#define EVENT 0xe9
#define PERFEVTSEL_EN (1 << 22)
#define PERFEVTSEL_OS (1 << 17)
#define PERFEVTSEL_USR (1 << 16)
#define BASE (EVENT | PERFEVTSEL_EN | PERFEVTSEL_OS | PERFEVTSEL_USR)
#define MAXCPU 8
int force = 0;
int msrfd[MAXCPU];
int delay;
int absolute;
char *cfilter;
int verbose;
void usage(void);
void Vprintf(char *fmt, ...)
{
va_list ap;
va_start(ap,fmt);
if (verbose)
vfprintf(stderr,fmt,ap);
va_end(ap);
}
unsigned long long rdmsr(int cpu, unsigned long msr)
{
unsigned long long val;
if (pread(msrfd[cpu], &val, 8, msr) != 8) {
fprintf(stderr, "rdmsr of %lx failed: %s\n", msr, strerror(errno));
exit(1);
}
return val;
}
void wrmsr(int cpu, unsigned long msr, unsigned long long value)
{
if (pwrite(msrfd[cpu], &value, 8, msr) != 8) {
fprintf(stderr, "wdmsr of %lx failed: %s\n", msr, strerror(errno));
exit(1);
}
}
int cpufilter(int cpu)
{
long num;
char *end;
char *s;
if (!cfilter)
return 1;
for (s = cfilter;;) {
num = strtoul(s, &end, 0);
if (end == s)
usage();
if (cpu == num)
return 1;
if (*end == ',')
s = end+1;
else if (*end == 0)
break;
else
usage();
}
return 0;
}
void checkcounter(int cpu, int clear)
{
int i;
for (i = 1; i < 4; i++) {
int clear_this = clear;
unsigned long long evtsel = rdmsr(cpu, PERFEVTSEL0 + i);
Vprintf("%d: %x %Lx\n", cpu, PERFEVTSEL0 + i, evtsel);
if (!(evtsel & PERFEVTSEL_EN)) {
Vprintf("reinit %d\n", cpu);
wrmsr(cpu, PERFEVTSEL0 + i, BASE | masks[i - 1]);
clear_this = 1;
} else if (evtsel == (BASE | (masks[i-1] << 8))) {
/* everything fine */
} else if (force) {
Vprintf("reinit force %d\n", cpu);
wrmsr(cpu, PERFEVTSEL0 + i, BASE | (masks[i - 1] << 8));
clear_this = 1;
} else {
fprintf(stderr, "perfctr %d cpu %d already used with %Lx\n",
i, cpu, evtsel);
fprintf(stderr, "Consider using -f if you know what you're doing.\n");
exit(1);
}
if (clear_this) {
Vprintf("clearing %d\n", cpu);
wrmsr(cpu, PERFCTR0 + i, 0);
}
}
}
void setup(int clear)
{
DIR *dir;
struct dirent *d;
int numcpus = 0;
memset(msrfd, -1, sizeof(msrfd));
dir = opendir("/dev/cpu");
if (!dir)
err("cannot open /dev/cpu");
while ((d = readdir(dir)) != NULL) {
char buf[64];
char *end;
long cpunum = strtoul(d->d_name, &end, 0);
if (*end != 0)
continue;
if (cpunum > MAXCPU) {
fprintf(stderr, "too many cpus %ld %s\n", cpunum, d->d_name);
continue;
}
if (!cpufilter(cpunum))
continue;
snprintf(buf, 63, "/dev/cpu/%ld/msr", cpunum);
msrfd[cpunum] = open64(buf, O_RDWR);
if (msrfd[cpunum] < 0)
continue;
numcpus++;
checkcounter(cpunum, clear);
}
closedir(dir);
if (numcpus == 0) {
fprintf(stderr, "No CPU found using MSR driver.\n");
exit(1);
}
}
void printf_padded(int pad, char *fmt, ...)
{
char buf[pad + 1];
va_list ap;
va_start(ap, fmt);
vsnprintf(buf, pad, fmt, ap);
printf("%-*s", pad, buf);
va_end(ap);
}
void print_header(void)
{
printf_padded(4, "CPU ");
printf_padded(16, "LOCAL");
printf_padded(16, "LOCAL->REMOTE");
printf_padded(16, "REMOTE->LOCAL");
putchar('\n');
}
void print_cpu(int cpu)
{
int i;
static unsigned long long lastval[4];
printf_padded(4, "%d", cpu);
for (i = 1; i < 4; i++) {
unsigned long long val = rdmsr(cpu, PERFCTR0 + i);
if (absolute)
printf_padded(16, "%Lu", val);
else
printf_padded(16, "%Lu", val - lastval[i]);
lastval[i] = val;
}
putchar('\n');
}
void dumpall(void)
{
int cnt = 0;
int cpu;
print_header();
for (;;) {
for (cpu = 0; cpu < MAXCPU; ++cpu) {
if (msrfd[cpu] < 0)
continue;
print_cpu(cpu);
}
if (!delay)
break;
sleep(delay);
if (++cnt > 40) {
cnt = 0;
print_header();
}
}
}
void checkk8(void)
{
char *line = NULL;
size_t size = 0;
int bad = 0;
FILE *f = fopen("/proc/cpuinfo", "r");
if (!f)
return;
while (getline(&line, &size, f) > 0) {
if (!strncmp("vendor_id", line, 9)) {
if (!strstr(line, "AMD"))
bad++;
}
if (!strncmp("cpu family", line, 10)) {
char *s = line + strcspn(line,":");
int family;
if (*s == ':') ++s;
family = strtoul(s, NULL, 0);
if (family != 15)
bad++;
}
}
if (bad) {
printf("not an opteron cpu\n");
exit(1);
}
free(line);
fclose(f);
}
void usage(void)
{
fprintf(stderr, "usage: numamon [args] [delay]\n");
fprintf(stderr, " -f forcibly overwrite counters\n");
fprintf(stderr, " -i count IO (default memory)\n");
fprintf(stderr, " -a print absolute counter values (with delay)\n");
fprintf(stderr, " -s setup counters and exit\n");
fprintf(stderr, " -c clear counters and exit\n");
fprintf(stderr, " -m Print memory traffic (default)\n");
fprintf(stderr, " -C cpu{,cpu} only print for cpus\n");
fprintf(stderr, " -v Be verbose\n");
exit(1);
}
int main(int ac, char **av)
{
int opt;
checkk8();
while ((opt = getopt(ac,av,"ifscmaC:v")) != -1) {
switch (opt) {
case 'f':
force = 1;
break;
case 'c':
setup(1);
exit(0);
case 's':
setup(0);
exit(0);
case 'm':
masks = mem;
break;
case 'i':
masks = io;
break;
case 'a':
absolute = 1;
break;
case 'C':
cfilter = optarg;
break;
case 'v':
verbose = 1;
break;
default:
usage();
}
}
if (av[optind]) {
char *end;
delay = strtoul(av[optind], &end, 10);
if (*end)
usage();
if (av[optind+1])
usage();
}
setup(0);
dumpall();
return 0;
}
07070100000028000081A400003EA60000003200000001612720430000194A000000000000000000000000000000000000002600000000numactl-2.0.14.20.g4ee5e0c/numastat.8.TH "numastat" "8" "1.0.0" "Bill Gray" "Administration"
.SH NAME
.LP
\fBnumastat\fP \- Show per-NUMA-node memory statistics for processes and the operating system
.SH "SYNTAX"
.LP
\fBnumastat\fP
.br
.LP
\fBnumastat\fP [\fI\-V\fP]
.br
.LP
\fBnumastat\fP [\fI\<PID>|<pattern>...\fP]
.br
.LP
\fBnumastat\fP [\fI\-c\fP] [\fI\-m\fP] [\fI\-n\fP] [\fI\-p <PID>|<pattern>\fP] [\fI\-s[<node>]\fP] [\fI\-v\fP] [\fI\-z\fP] [\fI\<PID>|<pattern>...\fP]
.br
.SH "DESCRIPTION"
.LP
.B numastat
with no command options or arguments at all, displays per-node NUMA hit and
miss system statistics from the kernel memory allocator. This default
\fBnumastat\fP behavior is strictly compatible with the previous long-standing
\fBnumastat\fP perl script, written by Andi Kleen. The default \fBnumastat\fP
statistics shows per-node numbers (in units of pages of memory) in these categories:
.LP
.B numa_hit
is memory successfully allocated on this node as intended.
.LP
.B numa_miss
is memory allocated on this node despite the process preferring some different node. Each
.I numa_miss
has a
.I numa_foreign
on another node.
.LP
.B numa_foreign
is memory intended for this node, but actually allocated on some different node. Each
.I numa_foreign
has a
.I numa_miss
on another node.
.LP
.B interleave_hit
is interleaved memory successfully allocated on this node as intended.
.LP
.B local_node
is memory allocated on this node while a process was running on it.
.LP
.B other_node
is memory allocated on this node while a process was running on some other node.
.LP
Any supplied options or arguments with the \fBnumastat\fP command will
significantly change both the content and the format of the display. Specified
options will cause display units to change to megabytes of memory, and will
change other specific behaviors of \fBnumastat\fP as described below.
.LP
Memory usage information reflects the resident pages on the system.
.SH "OPTIONS"
.LP
.TP
\fB\-c\fR
Minimize table display width by dynamically shrinking column widths based on
data contents. With this option, amounts of memory will be rounded to the
nearest megabyte (rather than the usual display with two decimal places).
Column width and inter-column spacing will be somewhat unpredictable with this
option, but the more dense display will be very useful on systems with many
NUMA nodes.
.TP
\fB\-m\fR
Show the meminfo-like system-wide memory usage information. This option
produces a per-node breakdown of memory usage information similar to that found
in /proc/meminfo.
.TP
\fB\-n\fR
Show the original \fBnumastat\fP statistics info. This will show the same
information as the default \fBnumastat\fP behavior but the units will be megabytes of
memory, and there will be other formatting and layout changes versus the
original \fBnumastat\fP behavior.
.TP
\fB\-p\fR <\fBPID\fP> or <\fBpattern\fP>
Show per-node memory allocation information for the specified PID or pattern.
If the \-p argument is only digits, it is assumed to be a numerical PID. If
the argument characters are not only digits, it is assumed to be a text
fragment pattern to search for in process command lines. For example,
\fBnumastat -p qemu\fP will attempt to find and show information for processes
with "qemu" in the command line. Any command line arguments remaining after
\fBnumastat\fP option flag processing is completed, are assumed to be
additional <\fBPID\fP> or <\fBpattern\fP> process specifiers. In this sense,
the \fB\-p\fP option flag is optional: \fBnumastat qemu\fP is equivalent to
\fBnumastat -p qemu\fP
.TP
\fB\-s[<node>]\fR
Sort the table data in descending order before displaying it, so the biggest
memory consumers are listed first. With no specified <node>, the table will be
sorted by the total column. If the optional <node> argument is supplied, the
data will be sorted by the <node> column. Note that <node> must follow the
\fB\-s\fP immediately with no intermediate white space (e.g., \fBnumastat
\-s2\fP). Because \fB\-s\fP can allow an optional argument, it must always be
the last option character in a compound option character string. For example,
instead of \fBnumastat \-msc\fP (which probably will not work as you expect),
use \fBnumastat \-mcs\fP
.TP
\fB\-v\fR
Make some reports more verbose. In particular, process information for
multiple processes will display detailed information for each process.
Normally when per-node information for multiple processes is displayed, only
the total lines are shown.
.TP
\fB\-V\fR
Display \fBnumastat\fP version information and exit.
.TP
\fB\-z\fR
Skip display of table rows and columns of only zero valuess. This can be used
to greatly reduce the amount of uninteresting zero data on systems with many
NUMA nodes. Note that when rows or columns of zeros are still displayed with
this option, that probably means there is at least one value in the row or
column that is actually non-zero, but rounded to zero for display.
.SH NOTES
\fBnumastat\fP attempts to fold each table display so it will be conveniently
readable on the output terminal. Normally a terminal width of 80 characters is
assumed. When the \fBresize\fP command is available, \fBnumastat\fP attempts
to dynamically determine and fine tune the output tty width from \fBresize\fP
output. If \fBnumastat\fP output is not to a tty, very long output lines can
be produced, depending on how many NUMA nodes are present. In all cases,
output width can be explicitly specified via the \fBNUMASTAT_WIDTH\fP
environment variable. For example, \fBNUMASTAT_WIDTH=100 numastat\fP. On
systems with many NUMA nodes, \fBnumastat \-c \-z ....\fP can be very helpful
to selectively reduce the amount of displayed information.
.SH "ENVIRONMENT VARIABLES"
.LP
.TP
NUMASTAT_WIDTH
.SH "FILES"
.LP
\fI/proc/*/numa_maps\fP
.br
\fI/sys/devices/system/node/node*/meminfo\fP
.br
\fI/sys/devices/system/node/node*/numastat\fP
.SH "EXAMPLES"
.I numastat \-c \-z \-m \-n
.br
.I numastat \-czs libvirt kvm qemu
.br
.I watch \-n1 numastat
.br
.I watch \-n1 \-\-differences=cumulative numastat
.SH "AUTHORS"
.LP
The original \fBnumastat\fP perl script was written circa 2003 by Andi Kleen
<andi.kleen@intel.com>. The current \fBnumastat\fP program was written in 2012
by Bill Gray <bgray@redhat.com> to be compatible by default with the original,
and to add options to display per-node system memory usage and per-node process
memory allocation.
.SH "SEE ALSO"
.LP
.BR numactl (8),
.BR set_mempolicy( 2),
.BR numa (3)
07070100000029000081A400003EA60000003200000001612720430000F150000000000000000000000000000000000000002600000000numactl-2.0.14.20.g4ee5e0c/numastat.c/*
numastat - NUMA monitoring tool to show per-node usage of memory
Copyright (C) 2012 Bill Gray (bgray@redhat.com), Red Hat Inc
numastat is free software; you can redistribute it and/or modify it under the
terms of the GNU Lesser General Public License as published by the Free
Software Foundation; version 2.1.
numastat is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
You should find a copy of v2.1 of the GNU Lesser General Public License
somewhere on your Linux system; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
Historical note: From approximately 2003 to 2012, numastat was a perl script
written by Andi Kleen to display the /sys/devices/system/node/node<N>/numastat
statistics. In 2012, numastat was rewritten as a C program by Red Hat to
display per-node memory data for applications and the system in general,
while also remaining strictly compatible by default with the original numastat.
A copy of the original numastat perl script is included for reference at the
end of this file.
*/
// Compile with: gcc -O -std=gnu99 -Wall -o numastat numastat.c
#define __USE_MISC
#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <getopt.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#define STRINGIZE(s) #s
#define STRINGIFY(s) STRINGIZE(s)
#define KILOBYTE (1024)
#define MEGABYTE (1024 * 1024)
#define BUF_SIZE 2048
#define SMALL_BUF_SIZE 128
// Don't assume nodes are sequential or contiguous.
// Need to discover and map node numbers.
int *node_ix_map = NULL;
char **node_header;
// Structure to organize memory info from /proc/<PID>/numa_maps for a specific
// process, or from /sys/devices/system/node/node?/meminfo for system-wide
// data. Tables are defined below for each process and for system-wide data.
typedef struct meminfo {
int index;
char *token;
char *label;
} meminfo_t, *meminfo_p;
#define PROCESS_HUGE_INDEX 0
#define PROCESS_PRIVATE_INDEX 3
meminfo_t process_meminfo[] = {
{ PROCESS_HUGE_INDEX, "huge", "Huge" },
{ 1, "heap", "Heap" },
{ 2, "stack", "Stack" },
{ PROCESS_PRIVATE_INDEX, "N", "Private" }
};
#define PROCESS_MEMINFO_ROWS (sizeof(process_meminfo) / sizeof(process_meminfo[0]))
meminfo_t numastat_meminfo[] = {
{ 0, "numa_hit", "Numa_Hit" },
{ 1, "numa_miss", "Numa_Miss" },
{ 2, "numa_foreign", "Numa_Foreign" },
{ 3, "interleave_hit", "Interleave_Hit" },
{ 4, "local_node", "Local_Node" },
{ 5, "other_node", "Other_Node" },
};
#define NUMASTAT_MEMINFO_ROWS (sizeof(numastat_meminfo) / sizeof(numastat_meminfo[0]))
meminfo_t system_meminfo[] = {
{ 0, "MemTotal", "MemTotal" },
{ 1, "MemFree", "MemFree" },
{ 2, "MemUsed", "MemUsed" },
{ 3, "HighTotal", "HighTotal" },
{ 4, "HighFree", "HighFree" },
{ 5, "LowTotal", "LowTotal" },
{ 6, "LowFree", "LowFree" },
{ 7, "Active", "Active" },
{ 8, "Inactive", "Inactive" },
{ 9, "Active(anon)", "Active(anon)" },
{ 10, "Inactive(anon)", "Inactive(anon)" },
{ 11, "Active(file)", "Active(file)" },
{ 12, "Inactive(file)", "Inactive(file)" },
{ 13, "Unevictable", "Unevictable" },
{ 14, "Mlocked", "Mlocked" },
{ 15, "Dirty", "Dirty" },
{ 16, "Writeback", "Writeback" },
{ 17, "FilePages", "FilePages" },
{ 18, "Mapped", "Mapped" },
{ 19, "AnonPages", "AnonPages" },
{ 20, "Shmem", "Shmem" },
{ 21, "KernelStack", "KernelStack" },
{ 22, "PageTables", "PageTables" },
{ 23, "NFS_Unstable", "NFS_Unstable" },
{ 24, "Bounce", "Bounce" },
{ 25, "WritebackTmp", "WritebackTmp" },
{ 26, "Slab", "Slab" },
{ 27, "SReclaimable", "SReclaimable" },
{ 28, "SUnreclaim", "SUnreclaim" },
{ 29, "AnonHugePages", "AnonHugePages" },
{ 30, "ShmemHugePages", "ShmemHugePages" },
{ 31, "ShmemPmdMapped", "ShmemPmdMapped" },
{ 32, "HugePages_Total", "HugePages_Total" },
{ 33, "HugePages_Free", "HugePages_Free" },
{ 34, "HugePages_Surp", "HugePages_Surp" },
{ 35, "KReclaimable", "KReclaimable" }
};
#define SYSTEM_MEMINFO_ROWS (sizeof(system_meminfo) / sizeof(system_meminfo[0]))
// To allow re-ordering the meminfo memory categories in system_meminfo and
// numastat_meminfo relative to order in /proc, etc., a simple hash index is
// used to look up the meminfo categories. The allocated hash table size must
// be bigger than necessary to reduce collisions (and because these specific
// hash algorithms depend on having some unused buckets.
#define HASH_TABLE_SIZE 151
int hash_collisions = 0;
struct hash_entry {
char *name;
int index;
} hash_table[HASH_TABLE_SIZE];
void init_hash_table(void)
{
memset(hash_table, 0, sizeof(hash_table));
}
int hash_ix(char *s)
{
unsigned int h = 17;
while (*s) {
// h * 33 + *s++
h = ((h << 5) + h) + *s++;
}
return (h % HASH_TABLE_SIZE);
}
int hash_lookup(char *s)
{
int ix = hash_ix(s);
while (hash_table[ix].name) { // Assumes big table with blank entries
if (!strcmp(s, hash_table[ix].name)) {
return hash_table[ix].index; // found it
}
ix += 1;
if (ix >= HASH_TABLE_SIZE) {
ix = 0;
}
}
return -1;
}
int hash_insert(char *s, int i)
{
int ix = hash_ix(s);
while (hash_table[ix].name) { // assumes no duplicate entries
hash_collisions += 1;
ix += 1;
if (ix >= HASH_TABLE_SIZE) {
ix = 0;
}
}
hash_table[ix].name = s;
hash_table[ix].index = i;
return ix;
}
// To decouple details of table display (e.g. column width, line folding for
// display screen width, et cetera) from acquiring the data and populating the
// tables, this semi-general table handling code is used. There are various
// routines to set table attributes, assign and test some cell contents,
// initialize and actually display the table.
#define CELL_TYPE_NULL 0
#define CELL_TYPE_LONG 1
#define CELL_TYPE_DOUBLE 2
#define CELL_TYPE_STRING 3
#define CELL_TYPE_CHAR8 4
#define CELL_TYPE_REPCHAR 5
#define CELL_FLAG_FREEABLE (1 << 0)
#define CELL_FLAG_ROWSPAN (1 << 1)
#define CELL_FLAG_COLSPAN (1 << 2)
#define COL_JUSTIFY_LEFT (1 << 0)
#define COL_JUSTIFY_RIGHT (1 << 1)
#define COL_JUSTIFY_CENTER 3
#define COL_JUSTIFY_MASK 0x3
#define COL_FLAG_SEEN_DATA (1 << 2)
#define COL_FLAG_NON_ZERO_DATA (1 << 3)
#define COL_FLAG_ALWAYS_SHOW (1 << 4)
#define ROW_FLAG_SEEN_DATA COL_FLAG_SEEN_DATA
#define ROW_FLAG_NON_ZERO_DATA COL_FLAG_NON_ZERO_DATA
#define ROW_FLAG_ALWAYS_SHOW COL_FLAG_ALWAYS_SHOW
typedef struct cell {
uint32_t type;
uint32_t flags;
union {
char *s;
double d;
int64_t l;
char c[8];
};
} cell_t, *cell_p;
typedef struct vtab {
int header_rows;
int header_cols;
int data_rows;
int data_cols;
cell_p cell;
int *row_ix_map;
uint8_t *row_flags;
uint8_t *col_flags;
uint8_t *col_width;
uint8_t *col_decimal_places;
} vtab_t, *vtab_p;
#define ALL_TABLE_ROWS (table->header_rows + table->data_rows)
#define ALL_TABLE_COLS (table->header_cols + table->data_cols)
#define GET_CELL_PTR(row, col) (&table->cell[(row * ALL_TABLE_COLS) + col])
#define USUAL_GUTTER_WIDTH 1
void set_row_flag(vtab_p table, int row, int flag)
{
table->row_flags[row] |= (uint8_t)flag;
}
void set_col_flag(vtab_p table, int col, int flag)
{
table->col_flags[col] |= (uint8_t)flag;
}
void clear_row_flag(vtab_p table, int row, int flag)
{
table->row_flags[row] &= (uint8_t)~flag;
}
void clear_col_flag(vtab_p table, int col, int flag)
{
table->col_flags[col] &= (uint8_t)~flag;
}
int test_row_flag(vtab_p table, int row, int flag)
{
return ((table->row_flags[row] & (uint8_t)flag) != 0);
}
int test_col_flag(vtab_p table, int col, int flag)
{
return ((table->col_flags[col] & (uint8_t)flag) != 0);
}
void set_col_justification(vtab_p table, int col, int justify)
{
table->col_flags[col] &= (uint8_t)~COL_JUSTIFY_MASK;
table->col_flags[col] |= (uint8_t)(justify & COL_JUSTIFY_MASK);
}
void set_col_width(vtab_p table, int col, uint8_t width)
{
if (width >= SMALL_BUF_SIZE) {
width = SMALL_BUF_SIZE - 1;
}
table->col_width[col] = width;
}
void set_col_decimal_places(vtab_p table, int col, uint8_t places)
{
table->col_decimal_places[col] = places;
}
void set_cell_flag(vtab_p table, int row, int col, int flag)
{
cell_p c_ptr = GET_CELL_PTR(row, col);
c_ptr->flags |= (uint32_t)flag;
}
void clear_cell_flag(vtab_p table, int row, int col, int flag)
{
cell_p c_ptr = GET_CELL_PTR(row, col);
c_ptr->flags &= (uint32_t)~flag;
}
int test_cell_flag(vtab_p table, int row, int col, int flag)
{
cell_p c_ptr = GET_CELL_PTR(row, col);
return ((c_ptr->flags & (uint32_t)flag) != 0);
}
void string_assign(vtab_p table, int row, int col, char *s)
{
cell_p c_ptr = GET_CELL_PTR(row, col);
c_ptr->type = CELL_TYPE_STRING;
c_ptr->s = s;
}
void repchar_assign(vtab_p table, int row, int col, char c)
{
cell_p c_ptr = GET_CELL_PTR(row, col);
c_ptr->type = CELL_TYPE_REPCHAR;
c_ptr->c[0] = c;
}
void double_assign(vtab_p table, int row, int col, double d)
{
cell_p c_ptr = GET_CELL_PTR(row, col);
c_ptr->type = CELL_TYPE_DOUBLE;
c_ptr->d = d;
}
void long_assign(vtab_p table, int row, int col, int64_t l)
{
cell_p c_ptr = GET_CELL_PTR(row, col);
c_ptr->type = CELL_TYPE_LONG;
c_ptr->l = l;
}
void double_addto(vtab_p table, int row, int col, double d)
{
cell_p c_ptr = GET_CELL_PTR(row, col);
c_ptr->type = CELL_TYPE_DOUBLE;
c_ptr->d += d;
}
void long_addto(vtab_p table, int row, int col, int64_t l)
{
cell_p c_ptr = GET_CELL_PTR(row, col);
c_ptr->type = CELL_TYPE_LONG;
c_ptr->l += l;
}
void clear_assign(vtab_p table, int row, int col)
{
cell_p c_ptr = GET_CELL_PTR(row, col);
memset(c_ptr, 0, sizeof(cell_t));
}
void zero_table_data(vtab_p table, int type)
{
// Sets data area of table to zeros of specified type
for (int row = table->header_rows; (row < ALL_TABLE_ROWS); row++) {
for (int col = table->header_cols; (col < ALL_TABLE_COLS); col++) {
cell_p c_ptr = GET_CELL_PTR(row, col);
memset(c_ptr, 0, sizeof(cell_t));
c_ptr->type = type;
}
}
}
void sort_rows_descending_by_col(vtab_p table, int start_row, int stop_row, int col)
{
// Rearrange row_ix_map[] indices so the rows will be in
// descending order by the value in the specified column
for (int ix = start_row; (ix <= stop_row); ix++) {
int biggest_ix = ix;
cell_p biggest_ix_c_ptr = GET_CELL_PTR(table->row_ix_map[ix], col);
for (int iy = ix + 1; (iy <= stop_row); iy++) {
cell_p iy_c_ptr = GET_CELL_PTR(table->row_ix_map[iy], col);
if (biggest_ix_c_ptr->d < iy_c_ptr->d) {
biggest_ix_c_ptr = iy_c_ptr;
biggest_ix = iy;
}
}
if (biggest_ix != ix) {
int tmp = table->row_ix_map[ix];
table->row_ix_map[ix] = table->row_ix_map[biggest_ix];
table->row_ix_map[biggest_ix] = tmp;
}
}
}
void span(vtab_p table, int first_row, int first_col, int last_row, int last_col)
{
// FIXME: implement row / col spannnig someday?
}
void init_table(vtab_p table, int header_rows, int header_cols, int data_rows, int data_cols)
{
// init table sizes
table->header_rows = header_rows;
table->header_cols = header_cols;
table->data_rows = data_rows;
table->data_cols = data_cols;
// allocate memory for all the cells
int alloc_size = ALL_TABLE_ROWS * ALL_TABLE_COLS * sizeof(cell_t);
table->cell = malloc(alloc_size);
if (table->cell == NULL) {
perror("malloc failed line: " STRINGIFY(__LINE__));
exit(EXIT_FAILURE);
}
memset(table->cell, 0, alloc_size);
// allocate memory for the row map vector
alloc_size = ALL_TABLE_ROWS * sizeof(int);
table->row_ix_map = malloc(alloc_size);
if (table->row_ix_map == NULL) {
perror("malloc failed line: " STRINGIFY(__LINE__));
exit(EXIT_FAILURE);
}
for (int row = 0; (row < ALL_TABLE_ROWS); row++) {
table->row_ix_map[row] = row;
}
// allocate memory for the row flags vector
alloc_size = ALL_TABLE_ROWS * sizeof(uint8_t);
table->row_flags = malloc(alloc_size);
if (table->row_flags == NULL) {
perror("malloc failed line: " STRINGIFY(__LINE__));
exit(EXIT_FAILURE);
}
memset(table->row_flags, 0, alloc_size);
// allocate memory for the column flags vector
alloc_size = ALL_TABLE_COLS * sizeof(uint8_t);
table->col_flags = malloc(alloc_size);
if (table->col_flags == NULL) {
perror("malloc failed line: " STRINGIFY(__LINE__));
exit(EXIT_FAILURE);
}
memset(table->col_flags, 0, alloc_size);
// allocate memory for the column width vector
alloc_size = ALL_TABLE_COLS * sizeof(uint8_t);
table->col_width = malloc(alloc_size);
if (table->col_width == NULL) {
perror("malloc failed line: " STRINGIFY(__LINE__));
exit(EXIT_FAILURE);
}
memset(table->col_width, 0, alloc_size);
// allocate memory for the column precision vector
alloc_size = ALL_TABLE_COLS * sizeof(uint8_t);
table->col_decimal_places = malloc(alloc_size);
if (table->col_decimal_places == NULL) {
perror("malloc failed line: " STRINGIFY(__LINE__));
exit(EXIT_FAILURE);
}
memset(table->col_decimal_places, 0, alloc_size);
}
void free_cell(vtab_p table, int row, int col)
{
cell_p c_ptr = GET_CELL_PTR(row, col);
if ((c_ptr->type == CELL_TYPE_STRING)
&& (c_ptr->flags & CELL_FLAG_FREEABLE)
&& (c_ptr->s != NULL)) {
free(c_ptr->s);
}
memset(c_ptr, 0, sizeof(cell_t));
}
void free_table(vtab_p table)
{
if (table->cell != NULL) {
for (int row = 0; (row < ALL_TABLE_ROWS); row++) {
for (int col = 0; (col < ALL_TABLE_COLS); col++) {
free_cell(table, row, col);
}
}
free(table->cell);
}
if (table->row_ix_map != NULL) {
free(table->row_ix_map);
}
if (table->row_flags != NULL) {
free(table->row_flags);
}
if (table->col_flags != NULL) {
free(table->col_flags);
}
if (table->col_width != NULL) {
free(table->col_width);
}
if (table->col_decimal_places != NULL) {
free(table->col_decimal_places);
}
}
char *fmt_cell_data(cell_p c_ptr, int max_width, int decimal_places)
{
// Returns pointer to a static buffer, expecting caller to
// immediately use or copy the contents before calling again.
int rep_width = max_width - USUAL_GUTTER_WIDTH;
static char buf[SMALL_BUF_SIZE];
switch (c_ptr->type) {
case CELL_TYPE_NULL:
buf[0] = '\0';
break;
case CELL_TYPE_LONG:
snprintf(buf, SMALL_BUF_SIZE, "%ld", c_ptr->l);
break;
case CELL_TYPE_DOUBLE:
snprintf(buf, SMALL_BUF_SIZE, "%.*f", decimal_places, c_ptr->d);
break;
case CELL_TYPE_STRING:
snprintf(buf, SMALL_BUF_SIZE, "%s", c_ptr->s);
break;
case CELL_TYPE_CHAR8:
strncpy(buf, c_ptr->c, 8);
buf[8] = '\0';
break;
case CELL_TYPE_REPCHAR:
memset(buf, c_ptr->c[0], rep_width);
buf[rep_width] = '\0';
break;
default:
strcpy(buf, "Unknown");
break;
}
buf[max_width] = '\0';
return buf;
}
void auto_set_col_width(vtab_p table, int col, int min_width, int max_width)
{
int width = min_width;
for (int row = 0; (row < ALL_TABLE_ROWS); row++) {
cell_p c_ptr = GET_CELL_PTR(row, col);
if (c_ptr->type == CELL_TYPE_REPCHAR) {
continue;
}
char *p = fmt_cell_data(c_ptr, max_width, (int)(table->col_decimal_places[col]));
int l = strlen(p);
if (width < l) {
width = l;
}
}
width += USUAL_GUTTER_WIDTH;
if (width > max_width) {
width = max_width;
}
table->col_width[col] = (uint8_t)width;
}
void display_justified_cell(cell_p c_ptr, int row_flags, int col_flags, int width, int decimal_places)
{
char *p = fmt_cell_data(c_ptr, width, decimal_places);
int l = strlen(p);
char buf[SMALL_BUF_SIZE];
switch (col_flags & COL_JUSTIFY_MASK) {
case COL_JUSTIFY_LEFT:
memcpy(buf, p, l);
if (l < width) {
memset(&buf[l], ' ', width - l);
}
break;
case COL_JUSTIFY_RIGHT:
if (l < width) {
memset(buf, ' ', width - l);
}
memcpy(&buf[width - l], p, l);
break;
case COL_JUSTIFY_CENTER:
default:
memset(buf, ' ', width);
memcpy(&buf[(width - l + 1) / 2], p, l);
break;
}
buf[width] = '\0';
printf("%s", buf);
}
void display_table(vtab_p table,
int screen_width,
int show_unseen_rows,
int show_unseen_cols,
int show_zero_rows,
int show_zero_cols)
{
// Set row and column flags according to whether data in rows and cols
// has been assigned, and is currently non-zero.
int some_seen_data = 0;
int some_non_zero_data = 0;
for (int row = table->header_rows; (row < ALL_TABLE_ROWS); row++) {
for (int col = table->header_cols; (col < ALL_TABLE_COLS); col++) {
cell_p c_ptr = GET_CELL_PTR(row, col);
// Currently, "seen data" includes not only numeric data, but also
// any strings, etc -- anything non-NULL (other than rephcars).
if ((c_ptr->type != CELL_TYPE_NULL) && (c_ptr->type != CELL_TYPE_REPCHAR)) {
some_seen_data = 1;
set_row_flag(table, row, ROW_FLAG_SEEN_DATA);
set_col_flag(table, col, COL_FLAG_SEEN_DATA);
// Currently, "non-zero data" includes not only numeric data,
// but also any strings, etc -- anything non-zero (other than
// repchars, which are already excluded above). So, note a
// valid non-NULL pointer to an empty string would still be
// counted as non-zero data.
if (c_ptr->l != (int64_t)0) {
some_non_zero_data = 1;
set_row_flag(table, row, ROW_FLAG_NON_ZERO_DATA);
set_col_flag(table, col, COL_FLAG_NON_ZERO_DATA);
}
}
}
}
if (!some_seen_data) {
printf("Table has no data.\n");
return;
}
if (!some_non_zero_data && !show_zero_rows && !show_zero_cols) {
printf("Table has no non-zero data.\n");
return;
}
// Start with first data column and try to display table,
// folding lines as necessary per screen_width
int col = -1;
int data_col = table->header_cols;
while (data_col < ALL_TABLE_COLS) {
// Skip data columns until we have one to display
if ((!test_col_flag(table, data_col, COL_FLAG_ALWAYS_SHOW)) &&
(((!show_unseen_cols) && (!test_col_flag(table, data_col, COL_FLAG_SEEN_DATA))) ||
((!show_zero_cols) && (!test_col_flag(table, data_col, COL_FLAG_NON_ZERO_DATA))))) {
data_col += 1;
continue;
}
// Display blank line between table sections
if (col > 0) {
printf("\n");
}
// For each row, display as many columns as possible
for (int row_ix = 0; (row_ix < ALL_TABLE_ROWS); row_ix++) {
int row = table->row_ix_map[row_ix];
// If past the header rows, conditionally skip rows
if ((row >= table->header_rows) && (!test_row_flag(table, row, ROW_FLAG_ALWAYS_SHOW))) {
// Optionally skip row if no data seen or if all zeros
if (((!show_unseen_rows) && (!test_row_flag(table, row, ROW_FLAG_SEEN_DATA))) ||
((!show_zero_rows) && (!test_row_flag(table, row, ROW_FLAG_NON_ZERO_DATA)))) {
continue;
}
}
// Begin a new row...
int cur_line_width = 0;
// All lines start with the left header columns
for (col = 0; (col < table->header_cols); col++) {
display_justified_cell(GET_CELL_PTR(row, col),
(int)(table->row_flags[row]),
(int)(table->col_flags[col]),
(int)(table->col_width[col]),
(int)(table->col_decimal_places[col]));
cur_line_width += (int)(table->col_width[col]);
}
// Reset column index to starting data column for each new row
col = data_col;
// Try to display as many data columns as possible in every section
for (;;) {
// See if we should print this column
if (test_col_flag(table, col, COL_FLAG_ALWAYS_SHOW) ||
(((show_unseen_cols) || (test_col_flag(table, col, COL_FLAG_SEEN_DATA))) &&
((show_zero_cols) || (test_col_flag(table, col, COL_FLAG_NON_ZERO_DATA))))) {
display_justified_cell(GET_CELL_PTR(row, col),
(int)(table->row_flags[row]),
(int)(table->col_flags[col]),
(int)(table->col_width[col]),
(int)(table->col_decimal_places[col]));
cur_line_width += (int)(table->col_width[col]);
}
col += 1;
// End the line if no more columns or next column would exceed screen width
if ((col >= ALL_TABLE_COLS) ||
((cur_line_width + (int)(table->col_width[col])) > screen_width)) {
break;
}
}
printf("\n");
}
// Remember next starting data column for next section
data_col = col;
}
}
int verbose = 0;
int num_pids = 0;
int num_nodes = 0;
int screen_width = 0;
int show_zero_data = 1;
int compress_display = 0;
int sort_table = 0;
int sort_table_node = -1;
int compatibility_mode = 0;
int pid_array_max_pids = 0;
int *pid_array = NULL;
char *prog_name = NULL;
double page_size_in_bytes = 0;
double huge_page_size_in_bytes = 0;
void display_version_and_exit(void)
{
char *version_string = "20130723";
printf("%s version: %s: %s\n", prog_name, version_string, __DATE__);
exit(EXIT_SUCCESS);
}
void display_usage_and_exit(void)
{
fprintf(stderr, "Usage: %s [-c] [-m] [-n] [-p <PID>|<pattern>] [-s[<node>]] [-v] [-V] [-z] [ <PID>|<pattern>... ]\n", prog_name);
fprintf(stderr, "-c to minimize column widths\n");
fprintf(stderr, "-m to show meminfo-like system-wide memory usage\n");
fprintf(stderr, "-n to show the numastat statistics info\n");
fprintf(stderr, "-p <PID>|<pattern> to show process info\n");
fprintf(stderr, "-s[<node>] to sort data by total column or <node>\n");
fprintf(stderr, "-v to make some reports more verbose\n");
fprintf(stderr, "-V to show the %s code version\n", prog_name);
fprintf(stderr, "-z to skip rows and columns of zeros\n");
exit(EXIT_FAILURE);
}
int get_screen_width(void)
{
int width = 80;
char *p = getenv("NUMASTAT_WIDTH");
if (p != NULL) {
width = atoi(p);
if ((width < 1) || (width > 10000000)) {
width = 80;
}
} else if (isatty(fileno(stdout))) {
FILE *fs = popen("resize 2>/dev/null", "r");
if (fs != NULL) {
char buf[72];
char *columns;
columns = fgets(buf, sizeof(columns), fs);
pclose(fs);
if (columns && strncmp(columns, "COLUMNS=", 8) == 0) {
width = atoi(&columns[8]);
if ((width < 1) || (width > 10000000)) {
width = 80;
}
}
}
} else {
// Not a tty, so allow a really long line
width = 10000000;
}
if (width < 32) {
width = 32;
}
return width;
}
char *command_name_for_pid(int pid)
{
// Get the PID command name field from /proc/PID/status file. Return
// pointer to a static buffer, expecting caller to immediately copy result.
static char buf[SMALL_BUF_SIZE];
char fname[64];
snprintf(fname, sizeof(fname), "/proc/%d/status", pid);
FILE *fs = fopen(fname, "r");
if (!fs) {
return NULL;
} else {
while (fgets(buf, SMALL_BUF_SIZE, fs)) {
if (strstr(buf, "Name:") == buf) {
char *p = &buf[5];
while (isspace(*p)) {
p++;
}
if (p[strlen(p) - 1] == '\n') {
p[strlen(p) - 1] = '\0';
}
fclose(fs);
return p;
}
}
fclose(fs);
}
return NULL;
}
void show_info_from_system_file(char *file, meminfo_p meminfo, int meminfo_rows, int tok_offset)
{
// Setup and init table
vtab_t table;
int header_rows = 2 - compatibility_mode;
int header_cols = 1;
// Add an extra data column for a total column
init_table(&table, header_rows, header_cols, meminfo_rows, num_nodes + 1);
int total_col_ix = header_cols + num_nodes;
// Insert token mapping in hash table and assign left header column label for each row in table
init_hash_table();
for (int row = 0; (row < meminfo_rows); row++) {
hash_insert(meminfo[row].token, meminfo[row].index);
if (compatibility_mode) {
string_assign(&table, (header_rows + row), 0, meminfo[row].token);
} else {
string_assign(&table, (header_rows + row), 0, meminfo[row].label);
}
}
// printf("There are %d table hash collisions.\n", hash_collisions);
// Set left header column width and left justify it
set_col_width(&table, 0, 16);
set_col_justification(&table, 0, COL_JUSTIFY_LEFT);
// Open /sys/devices/system/node/node?/<file> for each node and store data
// in table. If not compatibility_mode, do approximately first third of
// this loop also for (node_ix == num_nodes) to get "Total" column header.
for (int node_ix = 0; (node_ix < (num_nodes + (1 - compatibility_mode))); node_ix++) {
int col = header_cols + node_ix;
// Assign header row label and horizontal line for this column...
string_assign(&table, 0, col, node_header[node_ix]);
if (!compatibility_mode) {
repchar_assign(&table, 1, col, '-');
int decimal_places = 2;
if (compress_display) {
decimal_places = 0;
}
set_col_decimal_places(&table, col, decimal_places);
}
// Set column width and right justify data
set_col_width(&table, col, 16);
set_col_justification(&table, col, COL_JUSTIFY_RIGHT);
if (node_ix == num_nodes) {
break;
}
// Open /sys/.../node<N>/numstast file for this node...
char buf[SMALL_BUF_SIZE];
char fname[64];
snprintf(fname, sizeof(fname), "/sys/devices/system/node/node%d/%s", node_ix_map[node_ix], file);
FILE *fs = fopen(fname, "r");
if (!fs) {
sprintf(buf, "cannot open %s", fname);
perror(buf);
exit(EXIT_FAILURE);
}
// Get table values for this node...
while (fgets(buf, SMALL_BUF_SIZE, fs)) {
char *tok[64];
int tokens = 0;
const char *delimiters = " \t\r\n:";
char *p = strtok(buf, delimiters);
if (p == NULL) {
continue; // Skip blank lines;
}
while (p) {
tok[tokens++] = p;
p = strtok(NULL, delimiters);
}
// example line from numastat file: "numa_miss 16463"
// example line from meminfo file: "Node 3 Inactive: 210680 kB"
int index = hash_lookup(tok[0 + tok_offset]);
if (index < 0) {
printf("Token %s not in hash table.\n", tok[0 + tok_offset]);
} else {
double value = (double)atol(tok[1 + tok_offset]);
if (!compatibility_mode) {
double multiplier = 1.0;
if (tokens < 4) {
multiplier = page_size_in_bytes;
} else if (!strncmp("HugePages", tok[2], 9)) {
multiplier = huge_page_size_in_bytes;
} else if (!strncmp("kB", tok[4], 2)) {
multiplier = KILOBYTE;
}
value *= multiplier;
value /= (double)MEGABYTE;
}
double_assign(&table, header_rows + index, col, value);
double_addto(&table, header_rows + index, total_col_ix, value);
}
}
fclose(fs);
}
// Crompress display column widths, if requested
if (compress_display) {
for (int col = 0; (col < header_cols + num_nodes + 1); col++) {
auto_set_col_width(&table, col, 4, 16);
}
}
// Optionally sort the table data
if (sort_table) {
int sort_col;
if ((sort_table_node < 0) || (sort_table_node >= num_nodes)) {
sort_col = total_col_ix;
} else {
sort_col = header_cols + node_ix_map[sort_table_node];
}
sort_rows_descending_by_col(&table, header_rows, header_rows + meminfo_rows - 1, sort_col);
}
// Actually display the table now, doing line-folding as necessary
display_table(&table, screen_width, 0, 0, show_zero_data, show_zero_data);
free_table(&table);
}
void show_numastat_info(void)
{
if (!compatibility_mode) {
printf("\nPer-node numastat info (in MBs):\n");
}
show_info_from_system_file("numastat", numastat_meminfo, NUMASTAT_MEMINFO_ROWS, 0);
}
void show_system_info(void)
{
printf("\nPer-node system memory usage (in MBs):\n");
show_info_from_system_file("meminfo", system_meminfo, SYSTEM_MEMINFO_ROWS, 2);
}
void show_process_info(void)
{
vtab_t table;
int header_rows = 2;
int header_cols = 1;
int data_rows;
int show_sub_categories = (verbose || (num_pids == 1));
if (show_sub_categories) {
data_rows = PROCESS_MEMINFO_ROWS;
} else {
data_rows = num_pids;
}
// Add two extra rows for a horizontal rule followed by a total row
// Add one extra data column for a total column
init_table(&table, header_rows, header_cols, data_rows + 2, num_nodes + 1);
int total_col_ix = header_cols + num_nodes;
int total_row_ix = header_rows + data_rows + 1;
string_assign(&table, total_row_ix, 0, "Total");
if (show_sub_categories) {
// Assign left header column label for each row in table
for (int row = 0; (row < PROCESS_MEMINFO_ROWS); row++) {
string_assign(&table, (header_rows + row), 0, process_meminfo[row].label);
}
} else {
string_assign(&table, 0, 0, "PID");
repchar_assign(&table, 1, 0, '-');
printf("\nPer-node process memory usage (in MBs)\n");
}
// Set left header column width and left justify it
set_col_width(&table, 0, 16);
set_col_justification(&table, 0, COL_JUSTIFY_LEFT);
// Set up "Node <N>" column headers over data columns, plus "Total" column
for (int node_ix = 0; (node_ix <= num_nodes); node_ix++) {
int col = header_cols + node_ix;
// Assign header row label and horizontal line for this column...
string_assign(&table, 0, col, node_header[node_ix]);
repchar_assign(&table, 1, col, '-');
// Set column width, decimal places, and right justify data
set_col_width(&table, col, 16);
int decimal_places = 2;
if (compress_display) {
decimal_places = 0;
}
set_col_decimal_places(&table, col, decimal_places);
set_col_justification(&table, col, COL_JUSTIFY_RIGHT);
}
// Initialize data in table to all zeros
zero_table_data(&table, CELL_TYPE_DOUBLE);
// If (show_sub_categories), show individual process tables for each PID,
// Otherwise show one big table of process total lines from all the PIDs.
for (int pid_ix = 0; (pid_ix < num_pids); pid_ix++) {
int pid = pid_array[pid_ix];
if (show_sub_categories) {
printf("\nPer-node process memory usage (in MBs) for PID %d (%s)\n", pid, command_name_for_pid(pid));
if (pid_ix > 0) {
// Re-initialize show_sub_categories table, because we re-use it for each PID.
zero_table_data(&table, CELL_TYPE_DOUBLE);
}
} else {
// Put this row's "PID (cmd)" label in left header column for this PID total row
char tmp_buf[64];
snprintf(tmp_buf, sizeof(tmp_buf), "%d (%s)", pid, command_name_for_pid(pid));
char *p = strdup(tmp_buf);
if (p == NULL) {
perror("malloc failed line: " STRINGIFY(__LINE__));
exit(EXIT_FAILURE);
}
string_assign(&table, header_rows + pid_ix, 0, p);
set_cell_flag(&table, header_rows + pid_ix, 0, CELL_FLAG_FREEABLE);
}
// Open numa_map for this PID to get per-node data
char fname[64];
snprintf(fname, sizeof(fname), "/proc/%d/numa_maps", pid);
char buf[BUF_SIZE];
FILE *fs = fopen(fname, "r");
if (!fs) {
sprintf(buf, "Can't read /proc/%d/numa_maps", pid);
perror(buf);
continue;
}
// Add up sub-category memory used from each node. Must go line by line
// through the numa_map figuring out which category memory, node, and the
// amount.
while (fgets(buf, BUF_SIZE, fs)) {
int category = PROCESS_PRIVATE_INDEX; // init category to the catch-all...
const char *delimiters = " \t\r\n";
char *p = strtok(buf, delimiters);
while (p) {
// If the memory category for this line is still the catch-all
// (i.e. private), then see if the current token is a special
// keyword for a specific memory sub-category.
if (category == PROCESS_PRIVATE_INDEX) {
for (int ix = 0; (ix < PROCESS_PRIVATE_INDEX); ix++) {
if (!strncmp(p, process_meminfo[ix].token, strlen(process_meminfo[ix].token))) {
category = ix;
break;
}
}
}
// If the current token is a per-node pages quantity, parse the
// node number and accumulate the number of pages in the specific
// category (and also add to the total).
if (p[0] == 'N') {
int node_num = (int)strtol(&p[1], &p, 10);
if (p[0] != '=') {
perror("node value parse error");
exit(EXIT_FAILURE);
}
double value = (double)strtol(&p[1], &p, 10);
double multiplier = page_size_in_bytes;
if (category == PROCESS_HUGE_INDEX) {
multiplier = huge_page_size_in_bytes;
}
value *= multiplier;
value /= (double)MEGABYTE;
// Add value to data cell, total_col, and total_row
int tmp_row;
if (show_sub_categories) {
tmp_row = header_rows + category;
} else {
tmp_row = header_rows + pid_ix;
}
// Don't assume nodes are sequential or contiguous.
// Need to find correct tmp_col from node_ix_map
int i = 0;
while(node_ix_map[i++] != node_num)
;
int tmp_col = header_cols + i - 1;
double_addto(&table, tmp_row, tmp_col, value);
double_addto(&table, tmp_row, total_col_ix, value);
double_addto(&table, total_row_ix, tmp_col, value);
double_addto(&table, total_row_ix, total_col_ix, value);
}
// Get next token on the line
p = strtok(NULL, delimiters);
}
}
// Currently, a non-root user can open some numa_map files successfully
// without error, but can't actually read the contents -- despite the
// 444 file permissions. So, use ferror() to check here to see if we
// actually got a read error, and if so, alert the user so they know
// not to trust the zero in the table.
if (ferror(fs)) {
sprintf(buf, "Can't read /proc/%d/numa_maps", pid);
perror(buf);
exit(EXIT_FAILURE);
}
fclose(fs);
// If showing individual tables, or we just added the last total line,
// prepare the table for display and display it...
if ((show_sub_categories) || (pid_ix + 1 == num_pids)) {
// Crompress display column widths, if requested
if (compress_display) {
for (int col = 0; (col < header_cols + num_nodes + 1); col++) {
auto_set_col_width(&table, col, 4, 16);
}
} else {
// Since not compressing the display, allow the left header
// column to be wider. Otherwise, sometimes process command
// name instance numbers can be truncated in an annoying way.
auto_set_col_width(&table, 0, 16, 24);
}
// Put dashes above Total line...
set_row_flag(&table, total_row_ix - 1, COL_FLAG_ALWAYS_SHOW);
for (int col = 0; (col < header_cols + num_nodes + 1); col++) {
repchar_assign(&table, total_row_ix - 1, col, '-');
}
// Optionally sort the table data
if (sort_table) {
int sort_col;
if ((sort_table_node < 0) || (sort_table_node >= num_nodes)) {
sort_col = total_col_ix;
} else {
sort_col = header_cols + node_ix_map[sort_table_node];
}
sort_rows_descending_by_col(&table, header_rows, header_rows + data_rows - 1, sort_col);
}
// Actually show the table
display_table(&table, screen_width, 0, 0, show_zero_data, show_zero_data);
}
} // END OF FOR_EACH-PID loop
free_table(&table);
} // show_process_info()
int node_and_digits(const struct dirent *dptr)
{
char *p = (char *)(dptr->d_name);
if (*p++ != 'n') return 0;
if (*p++ != 'o') return 0;
if (*p++ != 'd') return 0;
if (*p++ != 'e') return 0;
do {
if (!isdigit(*p++)) return 0;
} while (*p != '\0');
return 1;
}
void init_node_ix_map_and_header(int compatibility_mode)
{
// Count directory names of the form: /sys/devices/system/node/node<N>
struct dirent **namelist;
num_nodes = scandir("/sys/devices/system/node", &namelist, node_and_digits, NULL);
if (num_nodes < 1) {
if (compatibility_mode) {
perror("sysfs not mounted or system not NUMA aware");
} else {
perror("Couldn't open /sys/devices/system/node");
}
exit(EXIT_FAILURE);
} else {
node_ix_map = malloc(num_nodes * sizeof(int));
if (node_ix_map == NULL) {
perror("malloc failed line: " STRINGIFY(__LINE__));
exit(EXIT_FAILURE);
}
// For each "node<N>" filename present, save <N> in node_ix_map
for (int ix = 0; (ix < num_nodes); ix++) {
node_ix_map[ix] = atoi(&namelist[ix]->d_name[4]);
free(namelist[ix]);
}
free(namelist);
// Now, sort the node map in increasing order. Use a simplistic sort
// since we expect a relatively short (and maybe pre-ordered) list.
for (int ix = 0; (ix < num_nodes); ix++) {
int smallest_ix = ix;
for (int iy = ix + 1; (iy < num_nodes); iy++) {
if (node_ix_map[smallest_ix] > node_ix_map[iy]) {
smallest_ix = iy;
}
}
if (smallest_ix != ix) {
int tmp = node_ix_map[ix];
node_ix_map[ix] = node_ix_map[smallest_ix];
node_ix_map[smallest_ix] = tmp;
}
}
// Construct vector of "Node <N>" and "Total" column headers. Allocate
// one for each NUMA node, plus one on the end for the "Total" column
node_header = malloc((num_nodes + 1) * sizeof(char *));
if (node_header == NULL) {
perror("malloc failed line: " STRINGIFY(__LINE__));
exit(EXIT_FAILURE);
}
for (int node_ix = 0; (node_ix <= num_nodes); node_ix++) {
char node_label[64];
if (node_ix == num_nodes) {
strcpy(node_label, "Total");
} else if (compatibility_mode) {
snprintf(node_label, sizeof(node_label), "node%d", node_ix_map[node_ix]);
} else {
snprintf(node_label, sizeof(node_label), "Node %d", node_ix_map[node_ix]);
}
char *s = strdup(node_label);
if (s == NULL) {
perror("malloc failed line: " STRINGIFY(__LINE__));
exit(EXIT_FAILURE);
}
node_header[node_ix] = s;
}
}
}
void free_node_ix_map_and_header(void)
{
if (node_ix_map != NULL) {
free(node_ix_map);
node_ix_map = NULL;
}
if (node_header != NULL) {
for (int ix = 0; (ix <= num_nodes); ix++) {
free(node_header[ix]);
}
free(node_header);
node_header = NULL;
}
}
double get_huge_page_size_in_bytes(void)
{
double huge_page_size = 0;;
FILE *fs = fopen("/proc/meminfo", "r");
if (!fs) {
perror("Can't open /proc/meminfo");
exit(EXIT_FAILURE);
}
char buf[SMALL_BUF_SIZE];
while (fgets(buf, SMALL_BUF_SIZE, fs)) {
if (!strncmp("Hugepagesize", buf, 12)) {
char *p = &buf[12];
while ((!isdigit(*p)) && (p < buf + SMALL_BUF_SIZE)) {
p++;
}
huge_page_size = strtod(p, NULL);
break;
}
}
fclose(fs);
return huge_page_size * KILOBYTE;
}
int all_digits(char *p)
{
if (p == NULL) {
return 0;
}
while (*p != '\0') {
if (!isdigit(*p++)) return 0;
}
return 1;
}
int starts_with_digit(const struct dirent *dptr)
{
return (isdigit(dptr->d_name[0]));
}
void add_pid_to_list(int pid)
{
if (num_pids < pid_array_max_pids) {
pid_array[num_pids++] = pid;
} else {
if (pid_array_max_pids == 0) {
pid_array_max_pids = 32;
}
int *tmp_int_ptr = realloc(pid_array, 2 * pid_array_max_pids * sizeof(int));
if (tmp_int_ptr == NULL) {
char buf[SMALL_BUF_SIZE];
sprintf(buf, "Too many PIDs, skipping %d", pid);
perror(buf);
} else {
pid_array = tmp_int_ptr;
pid_array_max_pids *= 2;
pid_array[num_pids++] = pid;
}
}
}
int ascending(const void *p1, const void *p2)
{
return *(int *)p1 - *(int *) p2;
}
void sort_pids_and_remove_duplicates(void)
{
if (num_pids > 1) {
qsort(pid_array, num_pids, sizeof(int), ascending);
int ix1 = 0;
for (int ix2 = 1; (ix2 < num_pids); ix2++) {
if (pid_array[ix2] == pid_array[ix1]) {
continue;
}
ix1 += 1;
if (ix2 > ix1) {
pid_array[ix1] = pid_array[ix2];
}
}
num_pids = ix1 + 1;
}
}
void add_pids_from_pattern_search(char *pattern)
{
// Search all /proc/<PID>/cmdline files and /proc/<PID>/status:Name fields
// for matching patterns. Show the memory details for matching PIDs.
int num_matches_found = 0;
struct dirent **namelist;
int files = scandir("/proc", &namelist, starts_with_digit, NULL);
if (files < 0) {
perror("Couldn't open /proc");
}
for (int ix = 0; (ix < files); ix++) {
char buf[BUF_SIZE];
// First get Name field from status file
int pid = atoi(namelist[ix]->d_name);
char *p = command_name_for_pid(pid);
if (p) {
strcpy(buf, p);
} else {
buf[0] = '\0';
}
// Next copy cmdline file contents onto end of buffer. Do it a
// character at a time to convert nulls to spaces.
char fname[272];
snprintf(fname, sizeof(fname), "/proc/%s/cmdline", namelist[ix]->d_name);
FILE *fs = fopen(fname, "r");
if (fs) {
p = buf;
while (*p != '\0') {
p++;
}
*p++ = ' ';
int c;
while (((c = fgetc(fs)) != EOF) && (p < buf + BUF_SIZE - 1)) {
if (c == '\0') {
c = ' ';
}
*p++ = c;
}
*p++ = '\0';
fclose(fs);
}
if (strstr(buf, pattern)) {
if (pid != getpid()) {
add_pid_to_list(pid);
num_matches_found += 1;
}
}
free(namelist[ix]);
}
free(namelist);
if (num_matches_found == 0) {
printf("Found no processes containing pattern: \"%s\"\n", pattern);
}
}
int main(int argc, char **argv)
{
prog_name = argv[0];
int show_the_system_info = 0;
int show_the_numastat_info = 0;
static struct option long_options[] = {
{"help", 0, 0, '?'},
{0, 0, 0, 0}
};
int long_option_index = 0;
int opt;
while ((opt = getopt_long(argc, argv, "cmnp:s::vVz?", long_options, &long_option_index)) != -1) {
switch (opt) {
case 0:
printf("Unexpected long option %s", long_options[long_option_index].name);
if (optarg) {
printf(" with arg %s", optarg);
}
printf("\n");
display_usage_and_exit();
break;
case 'c':
compress_display = 1;
break;
case 'm':
show_the_system_info = 1;
break;
case 'n':
show_the_numastat_info = 1;
break;
case 'p':
if ((optarg) && (all_digits(optarg))) {
add_pid_to_list(atoi(optarg));
} else {
add_pids_from_pattern_search(optarg);
}
break;
case 's':
sort_table = 1;
if ((optarg) && (all_digits(optarg))) {
sort_table_node = atoi(optarg);
}
break;
case 'v':
verbose = 1;
break;
case 'V':
display_version_and_exit();
break;
case 'z':
show_zero_data = 0;
break;
default:
case '?':
display_usage_and_exit();
break;
}
}
// Figure out the display width, which is used to format the tables
// and limit the output columns per row
screen_width = get_screen_width();
// Any remaining arguments are assumed to be additional process specifiers
while (optind < argc) {
if (all_digits(argv[optind])) {
add_pid_to_list(atoi(argv[optind]));
} else {
add_pids_from_pattern_search(argv[optind]);
}
optind += 1;
}
// If there are no program options or arguments, be extremely compatible
// with the old numastat perl script (which is included at the end of this
// file for reference)
compatibility_mode = (argc == 1);
init_node_ix_map_and_header(compatibility_mode); // enumarate the NUMA nodes
if (compatibility_mode) {
show_numastat_info();
free_node_ix_map_and_header();
exit(EXIT_SUCCESS);
}
// Figure out page sizes
page_size_in_bytes = (double)sysconf(_SC_PAGESIZE);
huge_page_size_in_bytes = get_huge_page_size_in_bytes();
// Display the info for the process specifiers
if (num_pids > 0) {
sort_pids_and_remove_duplicates();
show_process_info();
}
if (pid_array != NULL) {
free(pid_array);
}
// Display the system-wide memory usage info
if (show_the_system_info) {
show_system_info();
}
// Display the numastat statistics info
if ((show_the_numastat_info) || ((num_pids == 0) && (!show_the_system_info))) {
show_numastat_info();
}
free_node_ix_map_and_header();
exit(EXIT_SUCCESS);
}
#if 0
/*
#!/usr/bin/perl
# Print numa statistics for all nodes
# Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
#
# numastat is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; version
# 2.
#
# numastat is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# You should find a copy of v2 of the GNU General Public License somewhere
# on your Linux system; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Example: NUMASTAT_WIDTH=80 watch -n1 numastat
#
# output width
$WIDTH=80;
if (defined($ENV{'NUMASTAT_WIDTH'})) {
$WIDTH=$ENV{'NUMASTAT_WIDTH'};
} else {
use POSIX;
if (POSIX::isatty(fileno(STDOUT))) {
if (open(R, "resize |")) {
while (<R>) {
$WIDTH=$1 if /COLUMNS=(\d+)/;
}
close R;
}
} else {
# don't split it up for easier parsing
$WIDTH=10000000;
}
}
$WIDTH = 32 if $WIDTH < 32;
if (! -d "/sys/devices/system/node" ) {
print STDERR "sysfs not mounted or system not NUMA aware\n";
exit 1;
}
%stat = ();
$title = "";
$mode = 0;
opendir(NODES, "/sys/devices/system/node") || exit 1;
foreach $nd (readdir(NODES)) {
next unless $nd =~ /node(\d+)/;
# On newer kernels, readdir may enumerate the 'node(\d+) subdirs
# in opposite order from older kernels--e.g., node{0,1,2,...}
# as opposed to node{N,N-1,N-2,...}. Accommodate this by
# switching to new mode so that the stats get emitted in
# the same order.
#print "readdir(NODES) returns $nd\n";
if (!$title && $nd =~ /node0/) {
$mode = 1;
}
open(STAT, "/sys/devices/system/node/$nd/numastat") ||
die "cannot open $nd: $!\n";
if (! $mode) {
$title = sprintf("%16s",$nd) . $title;
} else {
$title = $title . sprintf("%16s",$nd);
}
@fields = ();
while (<STAT>) {
($name, $val) = split;
if (! $mode) {
$stat{$name} = sprintf("%16u", $val) . $stat{$name};
} else {
$stat{$name} = $stat{$name} . sprintf("%16u", $val);
}
push(@fields, $name);
}
close STAT;
}
closedir NODES;
$numfields = int(($WIDTH - 16) / 16);
$l = 16 * $numfields;
for ($i = 0; $i < length($title); $i += $l) {
print "\n" if $i > 0;
printf "%16s%s\n","",substr($title,$i,$l);
foreach (@fields) {
printf "%-16s%s\n",$_,substr($stat{$_},$i,$l);
}
}
*/
#endif
0707010000002A000081A400003EA600000032000000016127204300000839000000000000000000000000000000000000002700000000numactl-2.0.14.20.g4ee5e0c/rtnetlink.c/* Simple LPGLed rtnetlink library */
#include <sys/socket.h>
#include <linux/rtnetlink.h>
#include <linux/netlink.h>
#include <netinet/in.h>
#include <errno.h>
#include <unistd.h>
#define hidden __attribute__((visibility("hidden")))
#include "rtnetlink.h"
hidden void *rta_put(struct nlmsghdr *m, int type, int len)
{
struct rtattr *rta = (void *)m + NLMSG_ALIGN(m->nlmsg_len);
int rtalen = RTA_LENGTH(len);
rta->rta_type = type;
rta->rta_len = rtalen;
m->nlmsg_len = NLMSG_ALIGN(m->nlmsg_len) + RTA_ALIGN(rtalen);
return RTA_DATA(rta);
}
hidden struct rtattr *rta_get(struct nlmsghdr *m, struct rtattr *p, int offset)
{
struct rtattr *rta;
if (p) {
rta = RTA_NEXT(p, m->nlmsg_len);
if (!RTA_OK(rta, m->nlmsg_len))
return NULL;
} else {
rta = (void *)m + NLMSG_ALIGN(offset);
}
return rta;
}
hidden int
rta_put_address(struct nlmsghdr *msg, int type, struct sockaddr *adr)
{
switch (adr->sa_family) {
case AF_INET: {
struct in_addr *i = rta_put(msg, type, 4);
*i = ((struct sockaddr_in *)adr)->sin_addr;
break;
}
case AF_INET6: {
struct in6_addr *i6 = rta_put(msg, type, 16);
*i6 = ((struct sockaddr_in6 *)adr)->sin6_addr;
break;
}
default:
return -1;
}
return 0;
}
/* Assumes no truncation. Make the buffer large enough. */
hidden int
rtnetlink_request(struct nlmsghdr *msg, int buflen, struct sockaddr_nl *adr)
{
int rsk;
int n;
int e;
/* Use a private socket to avoid having to keep state
for a sequence number. */
rsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (rsk < 0)
return -1;
n = sendto(rsk, msg, msg->nlmsg_len, 0, (struct sockaddr *)adr,
sizeof(struct sockaddr_nl));
if (n >= 0) {
socklen_t adrlen = sizeof(struct sockaddr_nl);
n = recvfrom(rsk, msg, buflen, 0, (struct sockaddr *)adr,
&adrlen);
}
e = errno;
close(rsk);
errno = e;
if (n < 0)
return -1;
/* Assume we only get a single reply back. This is (hopefully?)
safe because it's a single use socket. */
if (msg->nlmsg_type == NLMSG_ERROR) {
struct nlmsgerr *err = NLMSG_DATA(msg);
errno = -err->error;
return -1;
}
return 0;
}
0707010000002B000081A400003EA600000032000000016127204300000139000000000000000000000000000000000000002700000000numactl-2.0.14.20.g4ee5e0c/rtnetlink.hhidden int
rta_put_address(struct nlmsghdr *msg, int type, struct sockaddr *adr);
hidden struct rtattr *rta_get(struct nlmsghdr *m, struct rtattr *p, int offset);
hidden void *rta_put(struct nlmsghdr *m, int type, int len);
hidden int rtnetlink_request(struct nlmsghdr *msg, int buflen, struct sockaddr_nl *adr);
0707010000002C000081A400003EA600000032000000016127204300001F3A000000000000000000000000000000000000002100000000numactl-2.0.14.20.g4ee5e0c/shm.c/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
Manage shared memory policy for numactl.
The actual policy is set in numactl itself, this just sets up and maps
the shared memory segments and dumps them.
numactl is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; version
2.
numactl is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should find a copy of v2 of the GNU General Public License somewhere
on your Linux system; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#define _GNU_SOURCE 1
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/fcntl.h>
#include <sys/stat.h>
#include <stdarg.h>
#include <errno.h>
#include <unistd.h>
#include "numa.h"
#include "numaif.h"
#include "numaint.h"
#include "util.h"
#include "shm.h"
int shmfd = -1;
long shmid = 0;
char *shmptr;
unsigned long long shmlen;
mode_t shmmode = 0600;
unsigned long long shmoffset;
int shmflags;
static int shm_pagesize;
long huge_page_size(void)
{
size_t len = 0;
char *line = NULL;
FILE *f = fopen("/proc/meminfo", "r");
if (f != NULL) {
while (getdelim(&line, &len, '\n', f) > 0) {
int ps;
if (sscanf(line, "Hugepagesize: %d kB", &ps) == 1)
return ps * 1024;
}
free(line);
fclose(f);
}
return getpagesize();
}
static void check_region(char *opt)
{
if (((unsigned long)shmptr % shm_pagesize) || (shmlen % shm_pagesize)) {
fprintf(stderr, "numactl: policy region not page aligned\n");
exit(1);
}
if (!shmlen) {
fprintf(stderr,
"numactl: policy region length not specified before %s\n",
opt);
exit(1);
}
}
static key_t sysvkey(char *name)
{
int fd;
key_t key = ftok(name, shmid);
if (key >= 0)
return key;
fprintf(stderr, "numactl: Creating shm key file %s mode %04o\n",
name, shmmode);
fd = creat(name, shmmode);
if (fd < 0)
nerror("cannot create key for shm %s\n", name);
key = ftok(name, shmid);
if (key < 0)
nerror("cannot get key for newly created shm key file %s",
name);
return key;
}
/* Attach a sysv style shared memory segment. */
void attach_sysvshm(char *name, char *opt)
{
struct shmid_ds s;
key_t key = sysvkey(name);
shmfd = shmget(key, shmlen, shmflags);
if (shmfd < 0 && errno == ENOENT) {
if (shmlen == 0)
complain(
"need a --length to create a sysv shared memory segment");
fprintf(stderr,
"numactl: Creating shared memory segment %s id %ld mode %04o length %.fMB\n",
name, shmid, shmmode, ((double)shmlen) / (1024*1024) );
shmfd = shmget(key, shmlen, IPC_CREAT|shmmode|shmflags);
if (shmfd < 0)
nerror("cannot create shared memory segment");
}
if (shmlen == 0) {
if (shmctl(shmfd, IPC_STAT, &s) < 0)
err("shmctl IPC_STAT");
shmlen = s.shm_segsz;
}
shmptr = shmat(shmfd, NULL, 0);
if (shmptr == (void*)-1)
err("shmat");
shmptr += shmoffset;
shm_pagesize = (shmflags & SHM_HUGETLB) ? huge_page_size() : getpagesize();
check_region(opt);
}
/* Attach a shared memory file. */
void attach_shared(char *name, char *opt)
{
struct stat64 st;
shmfd = open(name, O_RDWR);
if (shmfd < 0) {
errno = 0;
if (shmlen == 0)
complain("need a --length to create a shared file");
shmfd = open(name, O_RDWR|O_CREAT, shmmode);
if (shmfd < 0)
nerror("cannot create file %s", name);
}
if (fstat64(shmfd, &st) < 0)
err("shm stat");
if (shmlen > st.st_size) {
if (ftruncate64(shmfd, shmlen) < 0) {
/* XXX: we could do it by hand, but it would it
would be impossible to apply policy then.
need to fix that in the kernel. */
perror("ftruncate");
exit(1);
}
}
shm_pagesize = st.st_blksize;
check_region(opt);
/* RED-PEN For shmlen > address space may need to map in pieces.
Left for some poor 32bit soul. */
shmptr = mmap64(NULL, shmlen, PROT_READ | PROT_WRITE, MAP_SHARED, shmfd, shmoffset);
if (shmptr == (char*)-1)
err("shm mmap");
}
static void
dumppol(unsigned long long start, unsigned long long end, int pol, struct bitmask *mask)
{
if (pol == MPOL_DEFAULT)
return;
printf("%016llx-%016llx: %s ",
shmoffset+start,
shmoffset+end,
policy_name(pol));
printmask("", mask);
}
/* Dump policies in a shared memory segment. */
void dump_shm(void)
{
struct bitmask *nodes, *prevnodes, *tag;
int prevpol = -1, pol;
unsigned long long c, start;
start = 0;
if (shmlen == 0) {
printf("nothing to dump\n");
return;
}
nodes = numa_allocate_nodemask();
tag = prevnodes = numa_allocate_nodemask();
for (c = 0; c < shmlen; c += shm_pagesize) {
if (get_mempolicy(&pol, nodes->maskp, nodes->size, c+shmptr,
MPOL_F_ADDR) < 0)
err("get_mempolicy on shm");
if (pol == prevpol)
continue;
if (prevpol != -1)
dumppol(start, c, prevpol, prevnodes);
prevnodes = nodes;
prevpol = pol;
start = c;
}
dumppol(start, c, prevpol, prevnodes);
numa_free_nodemask(nodes);
numa_free_nodemask(tag);
}
static void dumpnode(unsigned long long start, unsigned long long end, int node)
{
printf("%016llx-%016llx: %d\n", shmoffset+start, shmoffset+end, node);
}
/* Dump nodes in a shared memory segment. */
void dump_shm_nodes(void)
{
int prevnode = -1, node;
unsigned long long c, start;
start = 0;
if (shmlen == 0) {
printf("nothing to dump\n");
return;
}
for (c = 0; c < shmlen; c += shm_pagesize) {
if (get_mempolicy(&node, NULL, 0, c+shmptr,
MPOL_F_ADDR|MPOL_F_NODE) < 0)
err("get_mempolicy on shm");
if (node == prevnode)
continue;
if (prevnode != -1)
dumpnode(start, c, prevnode);
prevnode = node;
start = c;
}
dumpnode(start, c, prevnode);
}
static void vwarn(char *ptr, char *fmt, ...)
{
va_list ap;
unsigned long off = (unsigned long)ptr - (unsigned long)shmptr;
va_start(ap,fmt);
printf("numactl verify %lx(%lx): ", (unsigned long)ptr, off);
vprintf(fmt, ap);
va_end(ap);
exitcode = 1;
}
static unsigned interleave_next(unsigned cur, struct bitmask *mask)
{
int numa_num_nodes = numa_num_possible_nodes();
++cur;
while (!numa_bitmask_isbitset(mask, cur)) {
cur = (cur+1) % numa_num_nodes;
}
return cur;
}
/* Verify policy in a shared memory segment */
void verify_shm(int policy, struct bitmask *nodes)
{
char *p;
int ilnode, node;
int pol2;
struct bitmask *nodes2;
if (policy == MPOL_INTERLEAVE) {
if (get_mempolicy(&ilnode, NULL, 0, shmptr,
MPOL_F_ADDR|MPOL_F_NODE)
< 0)
err("get_mempolicy");
}
nodes2 = numa_allocate_nodemask();
for (p = shmptr; p - (char *)shmptr < shmlen; p += shm_pagesize) {
if (get_mempolicy(&pol2, nodes2->maskp, nodes2->size, p,
MPOL_F_ADDR) < 0)
err("get_mempolicy");
if (pol2 != policy) {
vwarn(p, "wrong policy %s, expected %s\n",
policy_name(pol2), policy_name(policy));
goto out;
}
if (memcmp(nodes2, nodes, numa_bitmask_nbytes(nodes))) {
vwarn(p, "mismatched node mask\n");
printmask("expected", nodes);
printmask("real", nodes2);
}
if (get_mempolicy(&node, NULL, 0, p, MPOL_F_ADDR|MPOL_F_NODE) < 0)
err("get_mempolicy");
switch (policy) {
case MPOL_INTERLEAVE:
if (node < 0 || !numa_bitmask_isbitset(nodes2, node))
vwarn(p, "interleave node out of range %d\n", node);
if (node != ilnode) {
vwarn(p, "expected interleave node %d, got %d\n",
ilnode,node);
goto out;
}
ilnode = interleave_next(ilnode, nodes2);
break;
case MPOL_PREFERRED:
case MPOL_BIND:
if (!numa_bitmask_isbitset(nodes2, node)) {
vwarn(p, "unexpected node %d\n", node);
printmask("expected", nodes2);
}
break;
case MPOL_DEFAULT:
break;
}
}
out:
numa_free_nodemask(nodes2);
}
0707010000002D000081A400003EA6000000320000000161272043000001A3000000000000000000000000000000000000002100000000numactl-2.0.14.20.g4ee5e0c/shm.h
extern int shmfd;
extern long shmid;
extern char *shmptr;
extern unsigned long long shmlen;
extern mode_t shmmode;
extern unsigned long long shmoffset;
extern int shmflags;
extern void dump_shm(void);
extern void dump_shm_nodes(void);
extern void attach_shared(char *, char *);
extern void attach_sysvshm(char *, char *);
extern void verify_shm(int policy, struct bitmask *);
/* in numactl.c */
extern int exitcode;
0707010000002E000081A400003EA6000000320000000161272043000018C0000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/stream_lib.c#include <stdio.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <sys/time.h>
#include <stdlib.h>
#include "stream_lib.h"
static inline double mysecond(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec * 1.e-6;
}
/*
* Program: Stream
* Programmer: Joe R. Zagar
* Revision: 4.0-BETA, October 24, 1995
* Original code developed by John D. McCalpin
*
* This program measures memory transfer rates in MB/s for simple
* computational kernels coded in C. These numbers reveal the quality
* of code generation for simple uncacheable kernels as well as showing
* the cost of floating-point operations relative to memory accesses.
*
* INSTRUCTIONS:
*
* 1) Stream requires a good bit of memory to run. Adjust the
* value of 'N' (below) to give a 'timing calibration' of
* at least 20 clock-ticks. This will provide rate estimates
* that should be good to about 5% precision.
*
* Hacked by AK to be a library
*/
long N = 8000000;
#define NTIMES 10
#define OFFSET 0
/*
* 3) Compile the code with full optimization. Many compilers
* generate unreasonably bad code before the optimizer tightens
* things up. If the results are unreasonably good, on the
* other hand, the optimizer might be too smart for me!
*
* Try compiling with:
* cc -O stream_d.c second_wall.c -o stream_d -lm
*
* This is known to work on Cray, SGI, IBM, and Sun machines.
*
*
* 4) Mail the results to mccalpin@cs.virginia.edu
* Be sure to include:
* a) computer hardware model number and software revision
* b) the compiler flags
* c) all of the output from the test case.
* Thanks!
*
*/
int checktick(void);
# define HLINE "-------------------------------------------------------------\n"
# ifndef MIN
# define MIN(x,y) ((x)<(y)?(x):(y))
# endif
# ifndef MAX
# define MAX(x,y) ((x)>(y)?(x):(y))
# endif
static double *a, *b, *c;
static double rmstime[4] = { 0 }, maxtime[4] = {
0}, mintime[4] = {
FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
static char *label[4] = { "Copy: ", "Scale: ",
"Add: ", "Triad: "
};
char *stream_names[] = { "Copy","Scale","Add","Triad" };
static double bytes[4];
int stream_verbose = 1;
#define Vprintf(x...) do { if (stream_verbose) printf(x); } while(0)
void stream_check(void)
{
int quantum;
int BytesPerWord;
register int j;
double t;
/* --- SETUP --- determine precision and check timing --- */
Vprintf(HLINE);
BytesPerWord = sizeof(double);
Vprintf("This system uses %d bytes per DOUBLE PRECISION word.\n",
BytesPerWord);
Vprintf(HLINE);
Vprintf("Array size = %lu, Offset = %d\n", N, OFFSET);
Vprintf("Total memory required = %.1f MB.\n",
(3 * N * BytesPerWord) / 1048576.0);
Vprintf("Each test is run %d times, but only\n", NTIMES);
Vprintf("the *best* time for each is used.\n");
/* Get initial value for system clock. */
for (j = 0; j < N; j++) {
a[j] = 1.0;
b[j] = 2.0;
c[j] = 0.0;
}
Vprintf(HLINE);
if ((quantum = checktick()) >= 1)
Vprintf("Your clock granularity/precision appears to be "
"%d microseconds.\n", quantum);
else
Vprintf("Your clock granularity appears to be "
"less than one microsecond.\n");
t = mysecond();
for (j = 0; j < N; j++)
a[j] = 2.0E0 * a[j];
t = 1.0E6 * (mysecond() - t);
Vprintf("Each test below will take on the order"
" of %d microseconds.\n", (int) t);
Vprintf(" (= %d clock ticks)\n", (int) (t / quantum));
Vprintf("Increase the size of the arrays if this shows that\n");
Vprintf("you are not getting at least 20 clock ticks per test.\n");
Vprintf(HLINE);
Vprintf("WARNING -- The above is only a rough guideline.\n");
Vprintf("For best results, please be sure you know the\n");
Vprintf("precision of your system timer.\n");
Vprintf(HLINE);
}
void stream_test(double *res)
{
register int j, k;
double scalar, times[4][NTIMES];
/* --- MAIN LOOP --- repeat test cases NTIMES times --- */
scalar = 3.0;
for (k = 0; k < NTIMES; k++) {
times[0][k] = mysecond();
for (j = 0; j < N; j++)
c[j] = a[j];
times[0][k] = mysecond() - times[0][k];
times[1][k] = mysecond();
for (j = 0; j < N; j++)
b[j] = scalar * c[j];
times[1][k] = mysecond() - times[1][k];
times[2][k] = mysecond();
for (j = 0; j < N; j++)
c[j] = a[j] + b[j];
times[2][k] = mysecond() - times[2][k];
times[3][k] = mysecond();
for (j = 0; j < N; j++)
a[j] = b[j] + scalar * c[j];
times[3][k] = mysecond() - times[3][k];
}
/* --- SUMMARY --- */
for (k = 0; k < NTIMES; k++) {
for (j = 0; j < 4; j++) {
rmstime[j] =
rmstime[j] + (times[j][k] * times[j][k]);
mintime[j] = MIN(mintime[j], times[j][k]);
maxtime[j] = MAX(maxtime[j], times[j][k]);
}
}
Vprintf
("Function Rate (MB/s) RMS time Min time Max time\n");
for (j = 0; j < 4; j++) {
double speed = 1.0E-06 * bytes[j] / mintime[j];
rmstime[j] = sqrt(rmstime[j] / (double) NTIMES);
Vprintf("%s%11.4f %11.4f %11.4f %11.4f\n", label[j],
speed,
rmstime[j], mintime[j], maxtime[j]);
if (res)
res[j] = speed;
}
}
# define M 20
int checktick(void)
{
int i, minDelta, Delta;
double t1, t2, timesfound[M];
/* Collect a sequence of M unique time values from the system. */
for (i = 0; i < M; i++) {
t1 = mysecond();
while (((t2 = mysecond()) - t1) < 1.0E-6);
timesfound[i] = t1 = t2;
}
/*
* Determine the minimum difference between these M values.
* This result will be our estimate (in microseconds) for the
* clock granularity.
*/
minDelta = 1000000;
for (i = 1; i < M; i++) {
Delta =
(int) (1.0E6 * (timesfound[i] - timesfound[i - 1]));
minDelta = MIN(minDelta, MAX(Delta, 0));
}
return (minDelta);
}
void stream_setmem(unsigned long size)
{
N = (size - OFFSET) / (3*sizeof(double));
}
long stream_memsize(void)
{
return 3*(sizeof(double) * (N+OFFSET)) ;
}
long stream_init(void *mem)
{
int i;
for (i = 0; i < 4; i++) {
rmstime[i] = 0;
maxtime[i] = 0;
mintime[i] = FLT_MAX;
}
bytes[0] = 2 * sizeof(double) * N;
bytes[1] = 2 * sizeof(double) * N;
bytes[2] = 3 * sizeof(double) * N;
bytes[3] = 3 * sizeof(double) * N;
a = mem;
b = (double *)mem + (N+OFFSET);
c = (double *)mem + 2*(N+OFFSET);
stream_check();
return 0;
}
0707010000002F000081A400003EA6000000320000000161272043000000EA000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/stream_lib.hlong stream_memsize(void);
long stream_init(void *mem);
#define STREAM_NRESULTS 4
void stream_test(double *res);
void stream_check(void);
void stream_setmem(unsigned long size);
extern int stream_verbose;
extern char *stream_names[];
07070100000030000081A400003EA60000003200000001612720430000033C000000000000000000000000000000000000002900000000numactl-2.0.14.20.g4ee5e0c/stream_main.c#include <stdio.h>
#include <sys/mman.h>
#include <stdlib.h>
#include "numa.h"
#include "numaif.h"
#include "util.h"
#include "stream_lib.h"
void usage(void)
{
exit(1);
}
char *policy = "default";
/* Run STREAM with a numa policy */
int main(int ac, char **av)
{
struct bitmask *nodes;
char *map;
long size;
int policy;
policy = parse_policy(av[1], av[2]);
nodes = numa_allocate_nodemask();
if (av[1] && av[2])
nodes = numa_parse_nodestring(av[2]);
if (!nodes) {
printf ("<%s> is invalid\n", av[2]);
exit(1);
}
size = stream_memsize();
map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
0, 0);
if (map == (char*)-1) exit(1);
if (mbind(map, size, policy, nodes->maskp, nodes->size, 0) < 0)
perror("mbind"), exit(1);
stream_init(map);
stream_test(NULL);
return 0;
}
07070100000031000081A400003EA600000032000000016127204300001D54000000000000000000000000000000000000002500000000numactl-2.0.14.20.g4ee5e0c/syscall.c/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
libnuma is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; version
2.1.
libnuma is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should find a copy of v2.1 of the GNU Lesser General Public License
somewhere on your Linux system; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include <unistd.h>
#include <sys/types.h>
#include <asm/unistd.h>
#include <errno.h>
#include "numa.h"
#include "numaif.h"
#include "numaint.h"
#include "config.h"
#include "util.h"
#define WEAK __attribute__((weak))
#if !defined(__NR_mbind) || !defined(__NR_set_mempolicy) || \
!defined(__NR_get_mempolicy) || !defined(__NR_migrate_pages) || \
!defined(__NR_move_pages)
#if defined(__x86_64__)
#define __NR_sched_setaffinity 203
#define __NR_sched_getaffinity 204
/* Official allocation */
#define __NR_mbind 237
#define __NR_set_mempolicy 238
#define __NR_get_mempolicy 239
#define __NR_migrate_pages 256
#define __NR_move_pages 279
#elif defined(__ia64__)
#define __NR_sched_setaffinity 1231
#define __NR_sched_getaffinity 1232
#define __NR_migrate_pages 1280
#define __NR_move_pages 1276
/* Official allocation */
#define __NR_mbind 1259
#define __NR_get_mempolicy 1260
#define __NR_set_mempolicy 1261
#elif defined(__i386__)
#define __NR_mbind 274
#define __NR_get_mempolicy 275
#define __NR_set_mempolicy 276
#define __NR_migrate_pages 294
#define __NR_move_pages 317
#elif defined(__powerpc__)
#define __NR_mbind 259
#define __NR_get_mempolicy 260
#define __NR_set_mempolicy 261
#define __NR_migrate_pages 258
/* FIXME: powerpc is missing move pages!!!
#define __NR_move_pages xxx
*/
#elif defined(__mips__)
#if _MIPS_SIM == _ABIO32
/*
* Linux o32 style syscalls are in the range from 4000 to 4999.
*/
#define __NR_Linux 4000
#define __NR_mbind (__NR_Linux + 268)
#define __NR_get_mempolicy (__NR_Linux + 269)
#define __NR_set_mempolicy (__NR_Linux + 270)
#define __NR_migrate_pages (__NR_Linux + 287)
#endif
#if _MIPS_SIM == _ABI64
/*
* Linux 64-bit syscalls are in the range from 5000 to 5999.
*/
#define __NR_Linux 5000
#define __NR_mbind (__NR_Linux + 227)
#define __NR_get_mempolicy (__NR_Linux + 228)
#define __NR_set_mempolicy (__NR_Linux + 229)
#define __NR_migrate_pages (__NR_Linux + 246)
#endif
#if _MIPS_SIM == _ABIN32
/*
* Linux N32 syscalls are in the range from 6000 to 6999.
*/
#define __NR_Linux 6000
#define __NR_mbind (__NR_Linux + 231)
#define __NR_get_mempolicy (__NR_Linux + 232)
#define __NR_set_mempolicy (__NR_Linux + 233)
#define __NR_migrate_pages (__NR_Linux + 250)
#endif
#elif defined(__hppa__)
#define __NR_migrate_pages 272
#elif defined(__arm__)
/* https://bugs.debian.org/796802 */
#warning "ARM does not implement the migrate_pages() syscall"
#elif defined(__s390x__)
#define __NR_mbind 268
#define __NR_get_mempolicy 269
#define __NR_set_mempolicy 270
#define __NR_migrate_pages 287
#define __NR_move_pages 310
#elif !defined(DEPS_RUN)
#error "Add syscalls for your architecture or update kernel headers"
#endif
#endif
#ifndef __GLIBC_PREREQ
# define __GLIBC_PREREQ(x,y) 0
#endif
#if defined(__GLIBC__) && __GLIBC_PREREQ(2, 11)
/* glibc 2.11 seems to have working 6 argument sycall. Use the
glibc supplied syscall in this case.
The version cut-off is rather arbitrary and could be probably
earlier. */
#define syscall6 syscall
#elif defined(__x86_64__)
/* 6 argument calls on x86-64 are often buggy in both glibc and
asm/unistd.h. Add a working version here. */
long syscall6(long call, long a, long b, long c, long d, long e, long f)
{
long res;
asm volatile ("movq %[d],%%r10 ; movq %[e],%%r8 ; movq %[f],%%r9 ; syscall"
: "=a" (res)
: "0" (call),"D" (a),"S" (b), "d" (c),
[d] "g" (d), [e] "g" (e), [f] "g" (f) :
"r11","rcx","r8","r10","r9","memory" );
if (res < 0) {
errno = -res;
res = -1;
}
return res;
}
#elif defined(__i386__)
/* i386 has buggy syscall6 in glibc too. This is tricky to do
in inline assembly because it clobbers so many registers. Do it
out of line. */
asm(
"__syscall6:\n"
" pushl %ebp\n"
" pushl %edi\n"
" pushl %esi\n"
" pushl %ebx\n"
" movl (0+5)*4(%esp),%eax\n"
" movl (1+5)*4(%esp),%ebx\n"
" movl (2+5)*4(%esp),%ecx\n"
" movl (3+5)*4(%esp),%edx\n"
" movl (4+5)*4(%esp),%esi\n"
" movl (5+5)*4(%esp),%edi\n"
" movl (6+5)*4(%esp),%ebp\n"
" int $0x80\n"
" popl %ebx\n"
" popl %esi\n"
" popl %edi\n"
" popl %ebp\n"
" ret"
);
extern long __syscall6(long n, long a, long b, long c, long d, long e, long f);
long syscall6(long call, long a, long b, long c, long d, long e, long f)
{
long res = __syscall6(call,a,b,c,d,e,f);
if (res < 0) {
errno = -res;
res = -1;
}
return res;
}
#else
#define syscall6 syscall
#endif
long WEAK get_mempolicy(int *policy, unsigned long *nmask,
unsigned long maxnode, void *addr,
unsigned flags)
{
return syscall(__NR_get_mempolicy, policy, nmask,
maxnode, addr, flags);
}
long WEAK mbind(void *start, unsigned long len, int mode,
const unsigned long *nmask, unsigned long maxnode, unsigned flags)
{
return syscall6(__NR_mbind, (long)start, len, mode, (long)nmask,
maxnode, flags);
}
long WEAK set_mempolicy(int mode, const unsigned long *nmask,
unsigned long maxnode)
{
long i;
i = syscall(__NR_set_mempolicy,mode,nmask,maxnode);
return i;
}
long WEAK migrate_pages(int pid, unsigned long maxnode,
const unsigned long *frommask, const unsigned long *tomask)
{
#if defined(__NR_migrate_pages)
return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask);
#else
errno = ENOSYS;
return -1;
#endif
}
long WEAK move_pages(int pid, unsigned long count,
void **pages, const int *nodes, int *status, int flags)
{
return syscall(__NR_move_pages, pid, count, pages, nodes, status, flags);
}
/* SLES8 glibc doesn't define those */
SYMVER("numa_sched_setaffinity_v1", "numa_sched_setaffinity@libnuma_1.1")
int numa_sched_setaffinity_v1(pid_t pid, unsigned len, const unsigned long *mask)
{
return syscall(__NR_sched_setaffinity,pid,len,mask);
}
SYMVER("numa_sched_setaffinity_v2", "numa_sched_setaffinity@@libnuma_1.2")
int numa_sched_setaffinity_v2(pid_t pid, struct bitmask *mask)
{
return syscall(__NR_sched_setaffinity, pid, numa_bitmask_nbytes(mask),
mask->maskp);
}
SYMVER("numa_sched_getaffinity_v1", "numa_sched_getaffinity@libnuma_1.1")
int numa_sched_getaffinity_v1(pid_t pid, unsigned len, const unsigned long *mask)
{
return syscall(__NR_sched_getaffinity,pid,len,mask);
}
SYMVER("numa_sched_getaffinity_v2", "numa_sched_getaffinity@@libnuma_1.2")
int numa_sched_getaffinity_v2(pid_t pid, struct bitmask *mask)
{
/* len is length in bytes */
return syscall(__NR_sched_getaffinity, pid, numa_bitmask_nbytes(mask),
mask->maskp);
/* sched_getaffinity returns sizeof(cpumask_t) */
}
make_internal_alias(numa_sched_getaffinity_v1);
make_internal_alias(numa_sched_getaffinity_v2);
make_internal_alias(numa_sched_setaffinity_v1);
make_internal_alias(numa_sched_setaffinity_v2);
07070100000032000081A400003EA6000000320000000161272043000004B6000000000000000000000000000000000000002300000000numactl-2.0.14.20.g4ee5e0c/sysfs.c/* Utility functions for reading sysfs values */
#define _GNU_SOURCE 1
#include <stdio.h>
#include <sys/fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdarg.h>
#include <ctype.h>
#include "numa.h"
#include "numaint.h"
#define SYSFS_BLOCK 4096
hidden char *sysfs_read(char *name)
{
char *buf;
int n;
int fd;
buf = malloc(SYSFS_BLOCK);
if (!buf)
return NULL;
fd = open(name, O_RDONLY);
n = read(fd, buf, SYSFS_BLOCK - 1);
close(fd);
if (n <= 0) {
free(buf);
return NULL;
}
buf[n] = 0;
return buf;
}
hidden int sysfs_node_read(struct bitmask *mask, char *fmt, ...)
{
int n, ret = 0;
va_list ap;
char *p, *fn, *m, *end;
int num;
va_start(ap, fmt);
n = vasprintf(&fn, fmt, ap);
va_end(ap);
if (n < 0)
return -1;
p = sysfs_read(fn);
free(fn);
if (!p)
return -1;
m = p;
do {
num = strtol(m, &end, 0);
if (m == end) {
ret = -1;
goto out;
}
if (num < 0) {
ret = -2;
goto out;
}
if (num >= numa_num_task_nodes()) {
ret = -1;
goto out;
}
numa_bitmask_setbit(mask, num);
/* Continuation not supported by kernel yet. */
m = end;
while (isspace(*m) || *m == ',')
m++;
} while (isdigit(*m));
out:
free(p);
return ret;
}
07070100000033000081A400003EA600000032000000016127204300000077000000000000000000000000000000000000002300000000numactl-2.0.14.20.g4ee5e0c/sysfs.hstruct bitmask;
hidden char *sysfs_read(char *name);
hidden int sysfs_node_read(struct bitmask *mask, char *fmt, ...);
07070100000034000041ED00003EA600000032000000026127204300000000000000000000000000000000000000000000002000000000numactl-2.0.14.20.g4ee5e0c/test07070100000035000081A400003EA60000003200000001612720430000031A000000000000000000000000000000000000002700000000numactl-2.0.14.20.g4ee5e0c/test/README
Various simple test scripts to verify some parts of the NUMA API.
To do a full regression test run make test
You should have at least two nodes on a NUMA system for the test suite.
The tests in regress assume that there is enough memory free on nodes 0/1.
They consider PREFERRED/INTERLEAVE not hitting the first choice node an
error.
They also require a relatively idle machine to avoid too much
noise from memory allocation from other processes. Without
that regress1 might fail.
You can run the tests under valgrind with VALGRIND=valgrind make test
Older valgrind versions incorrectly report a uninitialized byte error
on set_mempolicy. That is a false positive.
TBD: more detailed unit tests for mbind / shm / {get,set}_mempolicy
Currently everything is tested using numactl only.
07070100000036000081ED00003EA6000000320000000161272043000009B4000000000000000000000000000000000000002B00000000numactl-2.0.14.20.g4ee5e0c/test/bind_range#!/bin/bash
# This simple script checks --all/-a option which is used for
# suppressing of default cpuset awareness of options --cpunodebind,
# --physcpubind, --interleave, --preferred and --membind.
# NOTE: Test needs two nodes and two cpus at least
testdir=`dirname "$0"`
: ${srcdir:=${testdir}/..}
: ${builddir:=${srcdir}}
export PATH=${builddir}:$PATH
export old_mask
eval_test() {
# echo "Running $1.."
$1
if [ $? == 1 ] ; then
echo -e "$1 FAILED!"
reset_mask
exit 1
fi
echo -e "$1 PASSED"
}
function check_arg_order
{
numactl --all --physcpubind=$HIGHESTCPU ls > /dev/null 2>&1
if [ $? == 1 ] ; then
return 1;
fi
numactl --physcpubind=$HIGHESTCPU --all ls > /dev/null 2>&1
if [ $? == 0 ] ; then
return 1;
fi
return 0
}
function check_physcpubind
{
reset_mask
set_cpu_affinity 0
numactl --physcpubind=$HIGHESTCPU ls > /dev/null 2>&1
if [ $? == 0 ] ; then # shouldn't pass so easy
return 1;
fi
numactl --all --physcpubind=$HIGHESTCPU ls > /dev/null 2>&1
if [ $? == 1 ] ; then # shouldn't fail
return 1;
fi
return 0
}
function check_cpunodebind
{
local low_cpu_range
local high_cpu
reset_mask
low_cpu_range=$(cat /sys/devices/system/node/node$LOWESTNODE/cpulist)
set_cpu_affinity $low_cpu_range
numactl --cpunodebind=$HIGHESTNODE ls > /dev/null 2>&1
if [ $? == 1 ] ; then # should pass
return 1;
fi
numactl --all --cpunodebind=$HIGHESTNODE ls > /dev/null 2>&1
if [ $? == 1 ] ; then # should pass for sure
return 1;
fi
return 0
}
function set_cpu_affinity
{
taskset -p -c $1 $$ > /dev/null
#echo -e "\taffinity of shell was set to" $1
}
function get_mask
{
old_mask=$(taskset -p $$ | cut -f2 -d: | sed -e 's/^[ \t]*//')
}
function reset_mask
{
taskset -p $old_mask $$ > /dev/null
#echo -e "\taffinity of shell was reset to" $old_mask
}
ARCH=`uname -m`
if [ ${ARCH} != "s390x" ]; then
HIGHESTCPU=$(grep 'processor' /proc/cpuinfo | tail -n1 | cut -f2 -d':')
else
HIGHESTCPU=$(grep 'processor' /proc/cpuinfo | tail -n1 | cut -f2 | sed 's/://' )
fi
HIGHESTCPU=$(echo $HIGHESTCPU | cut -f2 -d' ')
HIGHESTNODE=$(numactl -H | grep -Pzo 'node [0-9]* cpus: [0-9].*(.|\n)node [0-9]* size: [1-9].* MB' | tail -n1 | cut -f2 -d' ')
LOWESTNODE=$(numactl -H | grep -Pzo 'node [0-9]* cpus: [0-9].*(.|\n)node [0-9]* size: [1-9].* MB' | head -n1 | cut -f2 -d' ')
get_mask
eval_test check_arg_order
eval_test check_physcpubind
eval_test check_cpunodebind
reset_mask
exit 0
07070100000037000081ED00003EA60000003200000001612720430000035A000000000000000000000000000000000000002E00000000numactl-2.0.14.20.g4ee5e0c/test/checkaffinity#!/bin/bash
# check if affinity works
testdir=`dirname "$0"`
: ${srcdir:=${testdir}/..}
: ${builddir:=${srcdir}}
export PATH=${builddir}:$PATH
S=`numactl --show | grep nodebind:`
NODES=`echo $S | sed -e "s/nodebind://"`
S=`numactl --show | grep physcpubind:`
CPUS=`echo $S | sed -e "s/physcpubind://"`
for i in $CPUS ; do
if [ "$(numactl --physcpubind=$i "${testdir}"/printcpu)" != "$i" ] ; then
echo "--physcpubind for $i doesn't work"
exit 1
fi
if [ "$(numactl --physcpubind=$i numactl --show | awk '/^physcpubind/ { print $2 }' )" != "$i" ] ; then
echo "--show doesn't agree with physcpubind for cpu $i"
exit 1
fi
done
for i in $NODES ; do
if [ $(numactl --cpunodebind=$i numactl --show | awk '/nodebind/ { print $2 }' ) != $i ] ; then
echo "--show doesn't agree with cpunodebind for node $i"
exit 1
fi
done
07070100000038000081ED00003EA600000032000000016127204300000596000000000000000000000000000000000000002E00000000numactl-2.0.14.20.g4ee5e0c/test/checktopology#!/bin/bash
# check numactl --hardware output
# this checks most of the topology discovery in libnuma
testdir=`dirname "$0"`
: ${srcdir:=${testdir}/..}
: ${builddir:=${srcdir}}
export PATH=${builddir}:$PATH
numcpus=$(grep -c processor /proc/cpuinfo)
numnodes=$(ls -1d /sys/devices/system/node/node[0-9]* | wc -l)
nccpus=$(numactl --hardware | grep cpus | sed 's/node.*cpus://' | wc -w )
ncnodes=$(numactl --hardware | grep -c 'node.*size' )
node_has_cpus=""
if [ $numnodes != $ncnodes ] ; then
echo "numactl --hardware doesnt report all nodes"
exit 1
fi
if [ $numcpus != $nccpus -a \( $[$nccpus / $numnodes] != $numcpus \) ] ; then
echo "numactl --hardware cpus look bogus"
exit 1
fi
if [ -s /sys/devices/system/node/has_cpu ]; then
node_has_cpus=$(cat /sys/devices/system/node/has_cpu | sed 's/,/ /')
fi
numactl --hardware | grep cpus | while read n ; do
node=${n/ cpus*/}
node=${node/ /}
cpus=${n/*: /}
check_node=$(echo $node | sed 's/node//')
if [[ -n ${node_has_cpus} ]]; then
if ! [[ "${node_has_cpus}" == *"$check_node"* ]]; then
echo "Skipping cpu less $node"
continue
fi
fi
k=0
for i in $cpus ; do
if [ ! -h "/sys/devices/system/node/$node/cpu$i" ] ; then
echo "$node doesn't have cpu $i"
exit 1
fi
k=$[$k+1]
done
if [ $k != $(echo $cpus | wc -w) ] ; then
echo "$node missing cpu"
exit 1
fi
done
07070100000039000081A400003EA600000032000000016127204300000411000000000000000000000000000000000000002B00000000numactl-2.0.14.20.g4ee5e0c/test/distance.c/* Test numa_distance */
#include <numa.h>
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
int maxnode, a, b, got_nodes = 0;
int *node_to_use;
if (numa_available() < 0) {
printf("no numa support in kernel\n");
exit(1);
}
maxnode = numa_max_node();
node_to_use = (int *)malloc(maxnode * sizeof(int));
for (a = 0; a <= maxnode; a++) {
if (numa_bitmask_isbitset(numa_nodes_ptr, a)){
node_to_use[got_nodes++] = a;
}
}
for (a = 0; a < got_nodes; a++){
printf("%03d: ", node_to_use[a]);
if (numa_distance(node_to_use[a], node_to_use[a]) != 10) {
printf("%d: self distance is not 10 (%d)\n",
node_to_use[a], numa_distance(node_to_use[a],node_to_use[a]));
exit(1);
}
for (b = 0; b < got_nodes; b++) {
int d1 = numa_distance(node_to_use[a], node_to_use[b]);
int d2 = numa_distance(node_to_use[b], node_to_use[a]);
printf("%03d ", d1);
if (d1 != d2) {
printf("\n(%d,%d)->(%d,%d) wrong!\n",node_to_use[a],node_to_use[b],d1,d2);
exit(1);
}
}
printf("\n");
}
return 0;
}
0707010000003A000081A400003EA600000032000000016127204300000084000000000000000000000000000000000000002700000000numactl-2.0.14.20.g4ee5e0c/test/ftok.c#include <sys/ipc.h>
#include <stdio.h>
int main(int ac, char **av)
{
while (*++av)
printf("0x%x\n", ftok(*av, 0));
return 0;
}
0707010000003B000081A400003EA600000032000000016127204300000246000000000000000000000000000000000000002E00000000numactl-2.0.14.20.g4ee5e0c/test/getnodemask.c#include <sched.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <numa.h>
int main(int argc, char *argv[])
{
nodemask_t nodemask;
int rc, i;
rc = numa_available();
printf("numa_available returns %d\n", rc);
if (rc < 0) exit(1);
nodemask_zero(&nodemask);
nodemask = numa_get_run_node_mask();
for (i = 0; i < 4; i++) {
printf("numa_get_run_node_mask nodemask_isset returns=0x%lx\n", nodemask_isset(&nodemask, i));
}
rc = numa_run_on_node_mask(&nodemask);
printf("rc=%d from numa_run_on_node_mask\n", rc);
return (0);
}
0707010000003C000081A400003EA600000032000000016127204300000BF7000000000000000000000000000000000000003200000000numactl-2.0.14.20.g4ee5e0c/test/mbind_mig_pages.c/*
* Test program to test the moving of pages using mbind.
*
* (C) 2006 Silicon Graphics, Inc.
* Christoph Lameter <clameter@sgi.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include <numa.h>
#include <numaif.h>
#include <unistd.h>
#include <asm/unistd.h>
unsigned int pagesize;
unsigned int page_count = 32;
char *page_base;
char *pages;
void **addr;
int *status;
int *nodes;
int errors;
int nr_nodes;
struct bitmask *old_nodes;
struct bitmask *new_nodes;
int main(int argc, char **argv)
{
int i, rc;
pagesize = getpagesize();
nr_nodes = numa_max_node()+1;
old_nodes = numa_bitmask_alloc(nr_nodes);
new_nodes = numa_bitmask_alloc(nr_nodes);
numa_bitmask_setbit(old_nodes, 0);
numa_bitmask_setbit(new_nodes, 1);
if (nr_nodes < 2) {
printf("A minimum of 2 nodes is required for this test.\n");
exit(1);
}
setbuf(stdout, NULL);
printf("mbind migration test ......\n");
if (argc > 1)
sscanf(argv[1], "%d", &page_count);
page_base = malloc((pagesize + 1) * page_count);
addr = malloc(sizeof(char *) * page_count);
status = malloc(sizeof(int *) * page_count);
nodes = malloc(sizeof(int *) * page_count);
if (!page_base || !addr || !status || !nodes) {
printf("Unable to allocate memory\n");
exit(1);
}
pages = (void *) ((((long)page_base) & ~((long)(pagesize - 1))) + pagesize);
for (i = 0; i < page_count; i++) {
if (i != 2)
/* We leave page 2 unallocated */
pages[ i * pagesize ] = (char) i;
addr[i] = pages + i * pagesize;
nodes[i] = 0;
status[i] = -123;
}
/* Move pages toi node zero */
numa_move_pages(0, page_count, addr, nodes, status, 0);
printf("\nPage status before page migration\n");
printf("---------------------------------\n");
rc = numa_move_pages(0, page_count, addr, NULL, status, 0);
if (rc < 0) {
perror("move_pages");
exit(1);
}
for (i = 0; i < page_count; i++) {
printf("Page %d vaddr=%p node=%d\n", i, pages + i * pagesize, status[i]);
if (i != 2 && status[i]) {
printf("Bad page state. Page %d status %d\n",i, status[i]);
exit(1);
}
}
/* Move to node zero */
printf("\nMoving pages via mbind to node 0 ...\n");
rc = mbind(pages, page_count * pagesize, MPOL_BIND, old_nodes->maskp,
old_nodes->size + 1, MPOL_MF_MOVE | MPOL_MF_STRICT);
if (rc < 0) {
perror("mbind");
errors++;
}
printf("\nMoving pages via mbind from node 0 to 1 ...\n");
rc = mbind(pages, page_count * pagesize, MPOL_BIND, new_nodes->maskp,
new_nodes->size + 1, MPOL_MF_MOVE | MPOL_MF_STRICT);
if (rc < 0) {
perror("mbind");
errors++;
}
numa_move_pages(0, page_count, addr, NULL, status, 0);
for (i = 0; i < page_count; i++) {
printf("Page %d vaddr=%lx node=%d\n", i,
(unsigned long)(pages + i * pagesize), status[i]);
if (i != 2) {
if (pages[ i* pagesize ] != (char) i) {
printf("*** Page content corrupted.\n");
errors++;
} else if (status[i] != 1) {
printf("*** Page on wrong node.\n");
errors++;
}
}
}
if (!errors)
printf("Test successful.\n");
else
printf("%d errors.\n", errors);
return errors > 0 ? 1 : 0;
}
0707010000003D000081A400003EA600000032000000016127204300000B97000000000000000000000000000000000000003000000000numactl-2.0.14.20.g4ee5e0c/test/migrate_pages.c/*
* Test program to test the moving of a processes pages.
*
* (C) 2006 Silicon Graphics, Inc.
* Christoph Lameter <clameter@sgi.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include <numa.h>
#include <unistd.h>
#include <errno.h>
unsigned int pagesize;
unsigned int page_count = 32;
char *page_base;
char *pages;
void **addr;
int *status;
int *nodes;
int errors;
int nr_nodes;
struct bitmask *old_nodes;
struct bitmask *new_nodes;
int main(int argc, char **argv)
{
int i, rc;
pagesize = getpagesize();
nr_nodes = numa_max_node()+1;
old_nodes = numa_bitmask_alloc(nr_nodes);
new_nodes = numa_bitmask_alloc(nr_nodes);
numa_bitmask_setbit(old_nodes, 1);
numa_bitmask_setbit(new_nodes, 0);
if (nr_nodes < 2) {
printf("A minimum of 2 nodes is required for this test.\n");
exit(1);
}
setbuf(stdout, NULL);
printf("migrate_pages() test ......\n");
if (argc > 1)
sscanf(argv[1], "%d", &page_count);
page_base = malloc((pagesize + 1) * page_count);
addr = malloc(sizeof(char *) * page_count);
status = malloc(sizeof(int *) * page_count);
nodes = malloc(sizeof(int *) * page_count);
if (!page_base || !addr || !status || !nodes) {
printf("Unable to allocate memory\n");
exit(1);
}
pages = (void *) ((((long)page_base) & ~((long)(pagesize - 1))) + pagesize);
for (i = 0; i < page_count; i++) {
if (i != 2)
/* We leave page 2 unallocated */
pages[ i * pagesize ] = (char) i;
addr[i] = pages + i * pagesize;
nodes[i] = 1;
status[i] = -123;
}
/* Move to starting node */
rc = numa_move_pages(0, page_count, addr, nodes, status, 0);
if (rc < 0 && errno != ENOENT) {
perror("move_pages");
exit(1);
}
/* Verify correct startup locations */
printf("Page location at the beginning of the test\n");
printf("------------------------------------------\n");
numa_move_pages(0, page_count, addr, NULL, status, 0);
for (i = 0; i < page_count; i++) {
printf("Page %d vaddr=%p node=%d\n", i, pages + i * pagesize, status[i]);
if (i != 2 && status[i] != 1) {
printf("Bad page state before migrate_pages. Page %d status %d\n",i, status[i]);
exit(1);
}
}
/* Move to node zero */
numa_move_pages(0, page_count, addr, nodes, status, 0);
printf("\nMigrating the current processes pages ...\n");
rc = numa_migrate_pages(0, old_nodes, new_nodes);
if (rc < 0) {
perror("numa_migrate_pages failed");
errors++;
}
/* Get page state after migration */
numa_move_pages(0, page_count, addr, NULL, status, 0);
for (i = 0; i < page_count; i++) {
printf("Page %d vaddr=%lx node=%d\n", i,
(unsigned long)(pages + i * pagesize), status[i]);
if (i != 2) {
if (pages[ i* pagesize ] != (char) i) {
printf("*** Page contents corrupted.\n");
errors++;
} else if (status[i]) {
printf("*** Page on the wrong node\n");
errors++;
}
}
}
if (!errors)
printf("Test successful.\n");
else
printf("%d errors.\n", errors);
return errors > 0 ? 1 : 0;
}
0707010000003E000081A400003EA600000032000000016127204300000B36000000000000000000000000000000000000002D00000000numactl-2.0.14.20.g4ee5e0c/test/move_pages.c/*
* Test program to test the moving of individual pages in a process.
*
* (C) 2006 Silicon Graphics, Inc.
* Christoph Lameter <clameter@sgi.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include "numa.h"
#include <unistd.h>
#include <asm/unistd.h>
unsigned int pagesize;
unsigned int page_count = 32;
char *page_base;
char *pages;
void **addr;
int *status;
int *nodes;
int errors;
int nr_nodes;
int *node_to_use;
int get_node_list()
{
int a, got_nodes = 0, max_node, numnodes;
long long free_node_sizes;
numnodes = numa_num_configured_nodes();
node_to_use = (int *)malloc(numnodes * sizeof(int));
max_node = numa_max_node();
for (a = 0; a <= max_node; a++) {
if (numa_node_size(a, &free_node_sizes) > 0)
node_to_use[got_nodes++] = a;
}
if(got_nodes != numnodes)
return -1;
return got_nodes;
}
int main(int argc, char **argv)
{
int i, rc;
pagesize = getpagesize();
nr_nodes = get_node_list();
if (nr_nodes < 2) {
printf("A minimum of 2 nodes is required for this test.\n");
exit(77);
}
if (nr_nodes == -1) {
printf("Mismatch between congfigured nodes and memory-rich nodes.\n");
exit(1);
}
setbuf(stdout, NULL);
printf("move_pages() test ......\n");
if (argc > 1)
sscanf(argv[1], "%d", &page_count);
printf("pages=%d (%s)\n", page_count, argv[1]);
page_base = malloc((pagesize + 1) * page_count);
addr = malloc(sizeof(char *) * page_count);
status = malloc(sizeof(int *) * page_count);
nodes = malloc(sizeof(int *) * page_count);
if (!page_base || !addr || !status || !nodes) {
printf("Unable to allocate memory\n");
exit(1);
}
pages = (void *) ((((long)page_base) & ~((long)(pagesize - 1))) + pagesize);
for (i = 0; i < page_count; i++) {
if (i != 2)
/* We leave page 2 unallocated */
pages[ i * pagesize ] = (char) i;
addr[i] = pages + i * pagesize;
nodes[i] = node_to_use[(i % nr_nodes)];
status[i] = -123;
}
printf("\nMoving pages to start node ...\n");
rc = numa_move_pages(0, page_count, addr, NULL, status, 0);
if (rc < 0)
perror("move_pages");
for (i = 0; i < page_count; i++)
printf("Page %d vaddr=%p node=%d\n", i, pages + i * pagesize, status[i]);
printf("\nMoving pages to target nodes ...\n");
rc = numa_move_pages(0, page_count, addr, nodes, status, 0);
if (rc < 0) {
perror("move_pages");
errors++;
}
for (i = 0; i < page_count; i++) {
if (i != 2) {
if (pages[ i* pagesize ] != (char) i)
errors++;
else if (nodes[i] != node_to_use[(i % nr_nodes)])
errors++;
}
}
for (i = 0; i < page_count; i++) {
printf("Page %d vaddr=%lx node=%d\n", i,
(unsigned long)(pages + i * pagesize), status[i]);
}
if (!errors)
printf("Test successful.\n");
else
printf("%d errors.\n", errors);
return errors > 0 ? 1 : 0;
}
0707010000003F000081A400003EA60000003200000001612720430000010E000000000000000000000000000000000000002900000000numactl-2.0.14.20.g4ee5e0c/test/mynode.c#include <numa.h>
#include <numaif.h>
#include <stdio.h>
int main(void)
{
int nd;
char *man = numa_alloc(1000);
*man = 1;
if (get_mempolicy(&nd, NULL, 0, man, MPOL_F_NODE|MPOL_F_ADDR) < 0)
perror("get_mempolicy");
else
printf("my node %d\n", nd);
return 0;
}
07070100000040000081A400003EA6000000320000000161272043000001B1000000000000000000000000000000000000002D00000000numactl-2.0.14.20.g4ee5e0c/test/node-parse.c/* Test wrapper for the nodemask parser */
#include <stdio.h>
#include "numa.h"
#include "util.h"
/* For util.c. Fixme. */
void usage(void)
{
exit(1);
}
int main(int ac, char **av)
{
int err = 0;
while (*++av) {
struct bitmask *mask = numa_parse_nodestring(*av);
if (!mask) {
printf("Failed to convert `%s'\n", *av);
err |= 1;
continue;
}
printmask("result", mask);
numa_bitmask_free(mask);
}
return err;
}
07070100000041000081A400003EA600000032000000016127204300000243000000000000000000000000000000000000002A00000000numactl-2.0.14.20.g4ee5e0c/test/nodemap.c#include "numa.h"
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
int i, k, w, ncpus;
struct bitmask *cpus;
int maxnode = numa_num_configured_nodes()-1;
if (numa_available() < 0) {
printf("no numa\n");
exit(1);
}
cpus = numa_allocate_cpumask();
ncpus = cpus->size;
for (i = 0; i <= maxnode ; i++) {
if (numa_node_to_cpus(i, cpus) < 0) {
printf("node %d failed to convert\n",i);
}
printf("%d: ", i);
w = 0;
for (k = 0; k < ncpus; k++)
if (numa_bitmask_isbitset(cpus, k))
printf(" %s%d", w>0?",":"", k);
putchar('\n');
}
return 0;
}
07070100000042000081ED00003EA60000003200000001612720430000009B000000000000000000000000000000000000002900000000numactl-2.0.14.20.g4ee5e0c/test/numademo#!/bin/sh
testdir=`dirname "$0"`
: ${srcdir:=${testdir}/..}
: ${builddir:=${srcdir}}
export PATH=${builddir}:$PATH
exec "${builddir}"/numademo -t -e 10M
07070100000043000081A400003EA600000032000000016127204300000066000000000000000000000000000000000000002B00000000numactl-2.0.14.20.g4ee5e0c/test/pagesize.c#include <unistd.h>
#include <stdio.h>
int main(void)
{
printf("%d\n", getpagesize());
return 0;
}
07070100000044000081A400003EA600000032000000016127204300000535000000000000000000000000000000000000002B00000000numactl-2.0.14.20.g4ee5e0c/test/prefered.c/* Test prefer policy */
#include "numa.h"
#include "numaif.h"
#include <sys/mman.h>
#include <stdio.h>
#include <assert.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#define err(x) perror(x),exit(1)
int main(void)
{
int max = numa_max_node();
int maxmask = numa_num_possible_nodes();
struct bitmask *nodes, *mask;
int pagesize = getpagesize();
int i;
int pol;
int node;
int err = 0;
nodes = numa_bitmask_alloc(maxmask);
mask = numa_bitmask_alloc(maxmask);
for (i = max; i >= 0; --i) {
char *mem = mmap(NULL, pagesize*(max+1), PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
char *adr = mem;
if (mem == (char *)-1)
err("mmap");
printf("%d offset %lx\n", i, (long)(adr - mem));
numa_bitmask_clearall(nodes);
numa_bitmask_clearall(mask);
numa_bitmask_setbit(nodes, i);
if (mbind(adr, pagesize, MPOL_PREFERRED, nodes->maskp,
nodes->size, 0) < 0)
err("mbind");
++*adr;
if (get_mempolicy(&pol, mask->maskp, mask->size, adr, MPOL_F_ADDR) < 0)
err("get_mempolicy");
assert(pol == MPOL_PREFERRED);
assert(numa_bitmask_isbitset(mask, i));
node = 0x123;
if (get_mempolicy(&node, NULL, 0, adr, MPOL_F_ADDR|MPOL_F_NODE) < 0)
err("get_mempolicy2");
printf("got node %d expected %d\n", node, i);
if (node != i)
err = 1;
}
return err;
}
07070100000045000081ED00003EA600000032000000016127204300000064000000000000000000000000000000000000002900000000numactl-2.0.14.20.g4ee5e0c/test/printcpu#!/bin/bash
#print cpu it is running on
declare -a arr
arr=( $(< /proc/self/stat) )
echo ${arr[38]}
07070100000046000081A400003EA600000032000000016127204300000E96000000000000000000000000000000000000002A00000000numactl-2.0.14.20.g4ee5e0c/test/randmap.c/* Randomly change policy */
#include <stdio.h>
#include "numa.h"
#include "numaif.h"
#include <sys/mman.h>
#include <sys/shm.h>
#include <sys/ipc.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#define SIZE (100*1024*1024)
#define PAGES (SIZE/pagesize)
#define perror(x) printf("%s: %s\n", x, strerror(errno))
#define err(x) perror(x),exit(1)
struct page {
unsigned long mask;
int policy;
};
struct page *pages;
char *map;
int pagesize;
void setpol(unsigned long offset, unsigned long length, int policy, unsigned long nodes)
{
long i, end;
printf("off:%lx length:%lx policy:%d nodes:%lx\n",
offset, length, policy, nodes);
if (mbind(map + offset*pagesize, length*pagesize, policy,
&nodes, 8, 0) < 0) {
printf("mbind: %s offset %lx length %lx policy %d nodes %lx\n",
strerror(errno),
offset*pagesize, length*pagesize,
policy, nodes);
return;
}
for (i = offset; i < offset+length; i++) {
pages[i].mask = nodes;
pages[i].policy = policy;
}
i = offset - 20;
if (i < 0)
i = 0;
end = offset+length+20;
if (end > PAGES)
end = PAGES;
for (; i < end; i++) {
int pol2;
unsigned long nodes2;
if (get_mempolicy(&pol2, &nodes2, sizeof(long)*8, map+i*pagesize,
MPOL_F_ADDR) < 0)
err("get_mempolicy");
if (pol2 != pages[i].policy) {
printf("%lx: got policy %d expected %d, nodes got %lx expected %lx\n",
i, pol2, pages[i].policy, nodes2, pages[i].mask);
}
if (policy != MPOL_DEFAULT && nodes2 != pages[i].mask) {
printf("%lx: nodes %lx, expected %lx, policy %d\n",
i, nodes2, pages[i].mask, policy);
}
}
}
static unsigned char pop4[16] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
};
int popcnt(unsigned long val)
{
int count = 0;
while (val) {
count += pop4[val & 0xf];
val >>= 4;
}
return count;
}
void testmap(void)
{
pages = calloc(1, PAGES * sizeof(struct page));
if (!pages)
exit(100);
printf("simple tests\n");
#define MB ((1024*1024)/pagesize)
setpol(0, PAGES, MPOL_INTERLEAVE, 3);
setpol(0, MB, MPOL_BIND, 1);
setpol(MB, MB, MPOL_BIND, 1);
setpol(MB, MB, MPOL_DEFAULT, 0);
setpol(MB, MB, MPOL_PREFERRED, 2);
setpol(MB/2, MB, MPOL_DEFAULT, 0);
setpol(MB+MB/2, MB, MPOL_BIND, 2);
setpol(MB/2+100, 100, MPOL_PREFERRED, 1);
setpol(100, 200, MPOL_PREFERRED, 1);
printf("done\n");
for (;;) {
unsigned long offset = random() % PAGES;
int policy = random() % (MPOL_MAX);
unsigned long nodes = random() % 4;
long length = random() % (PAGES - offset);
/* validate */
switch (policy) {
case MPOL_DEFAULT:
nodes = 0;
break;
case MPOL_INTERLEAVE:
case MPOL_BIND:
if (nodes == 0)
continue;
break;
case MPOL_PREFERRED:
if (popcnt(nodes) != 1)
continue;
break;
}
setpol(offset, length, policy, nodes);
}
}
int main(int ac, char **av)
{
unsigned long seed;
pagesize = getpagesize();
#if 0
map = mmap(NULL, SIZE, PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
if (map == (char*)-1)
err("mmap");
#else
int shmid = shmget(IPC_PRIVATE, SIZE, IPC_CREAT|0666);
if (shmid < 0) err("shmget");
map = shmat(shmid, NULL, SHM_RDONLY);
shmctl(shmid, IPC_RMID, NULL);
if (map == (char *)-1) err("shmat");
printf("map %p\n", map);
#endif
if (av[1]) {
char *end;
unsigned long timeout = strtoul(av[1], &end, 0);
switch (*end) {
case 'h': timeout *= 3600; break;
case 'm': timeout *= 60; break;
}
printf("running for %lu seconds\n", timeout);
alarm(timeout);
} else
printf("running forever\n");
if (av[1] && av[2])
seed = strtoul(av[2], 0, 0);
else
seed = time(0);
printf("random seed %lu\n", seed);
srandom(seed);
testmap();
/* test shm etc. */
return 0;
}
07070100000047000081A400003EA600000032000000016127204300000992000000000000000000000000000000000000002F00000000numactl-2.0.14.20.g4ee5e0c/test/realloc_test.c#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/mman.h>
#include "numa.h"
#include "numaif.h"
#define DEFAULT_NR_PAGES 1024
static int parse_int(const char *str)
{
char *endptr;
long ret = strtol(str, &endptr, 0);
if (*endptr != '\0') {
fprintf(stderr, "[error] strtol() failed: parse error: %s\n", endptr);
exit(1);
}
if (errno == ERANGE)
fprintf(stderr, "[warning] strtol() out of range\n");
if (ret > INT_MAX || ret < INT_MIN) {
fprintf(stderr, "[warning] parse_int() out of range\n");
ret = (ret > 0) ? INT_MAX : INT_MIN;
}
return (int) ret;
}
int main(int argc, char **argv)
{
char *mem;
int page_size = numa_pagesize();
int node = 0;
int nr_pages = DEFAULT_NR_PAGES;
if (numa_available() < 0) {
fprintf(stderr, "numa is not available");
exit(1);
}
if (argc > 1)
node = parse_int(argv[1]);
if (argc > 2)
nr_pages = parse_int(argv[2]);
mem = numa_alloc_onnode(page_size, node);
/* Store the policy of the newly allocated area */
unsigned long nodemask;
int mode;
int nr_nodes = numa_num_possible_nodes();
if (get_mempolicy(&mode, &nodemask, nr_nodes, mem,
MPOL_F_NODE | MPOL_F_ADDR) < 0) {
perror("get_mempolicy() failed");
exit(1);
}
/* Print some info */
printf("Page size: %d\n", page_size);
printf("Pages realloc'ed: %d\n", nr_pages);
printf("Allocate data in node: %d\n", node);
int i;
int nr_inplace = 0;
int nr_moved = 0;
for (i = 0; i < nr_pages; i++) {
/* Enlarge mem with one more page */
char *new_mem = numa_realloc(mem, (i+1)*page_size, (i+2)*page_size);
if (!new_mem) {
perror("numa_realloc() failed");
exit(1);
}
if (new_mem == mem)
++nr_inplace;
else
++nr_moved;
mem = new_mem;
/* Check the policy of the realloc'ed area */
unsigned long realloc_nodemask;
int realloc_mode;
if (get_mempolicy(&realloc_mode, &realloc_nodemask,
nr_nodes, mem, MPOL_F_NODE | MPOL_F_ADDR) < 0) {
perror("get_mempolicy() failed");
exit(1);
}
assert(realloc_nodemask == nodemask &&
realloc_mode == mode && "policy changed");
}
/* Shrink to the original size */
mem = numa_realloc(mem, (nr_pages + 1)*page_size, page_size);
if (!mem) {
perror("numa_realloc() failed");
exit(1);
}
numa_free(mem, page_size);
printf("In-place reallocs: %d\n", nr_inplace);
printf("Moved reallocs: %d\n", nr_moved);
return 0;
}
07070100000048000081ED00003EA6000000320000000161272043000014C2000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/test/regress#!/bin/bash
# simple regression test for numactl/numaapi
# must be run from 'test' directory of numactl source package,
# after build [just use 'make test']
# note the statistics checks may fail when the system is under
# memory pressure
# Copyright 2003,2004 Andi Kleen, SuSE Labs.
testdir=`dirname "$0"`
: ${srcdir:=${testdir}/..}
: ${builddir:=${srcdir}}
export PATH=${builddir}:$PATH
: ${NUMACTL:=${builddir}/numactl}
VALGRIND=${VALGRIND:-}
MB=$[1024*1024]
SIZE=$[15 * $MB]
DEMOSIZE=$[10 * $MB]
STAT_INTERVAL=5
PAGESIZE=$("${builddir}/test/pagesize")
PAGES=$[ $SIZE / $PAGESIZE ]
HALFPAGES=$[ $PAGES / 2 ]
HALFPAGES=$[ $HALFPAGES - 100 ]
DOUBLEPAGES=$[ $PAGES * 2 ]
DOUBLEPAGES=$[ $DOUBLEPAGES - 200 ]
NEEDPAGES=$[ $DOUBLEPAGES + $DOUBLEPAGES / 5 ] # 20% spare
EXIT=0
declare -i maxnode
declare -a node
declare -a nlist
# =====================================================================
numactl() {
$VALGRIND $NUMACTL "$@"
}
failed() {
echo '=======FAILED'
echo "Check if machine doesn't have background jobs and try again"
EXIT=1
}
# nstat statname node
nstat() {
sleep $STAT_INTERVAL
nid=node$2
id=`numastat | head -1 | awk -v node=$nid '{ for (i = 1; i <= NF; ++i) if($i==node) print i; exit }'`
declare -a fields
numastat | grep $1 | while read -a fields ; do
echo ${fields[$id]}
done
}
probe_hardware()
{
declare -i n=0
numnodes=$(numactl --hardware | awk '/^available/ { print $2 }')
maxnode=$(expr $numnodes - 1)
nlist=( $(numactl --hardware | grep "^node" | tail -1 |awk '{$1=""; print }') )
# find nodes with at least NEEDPAGES of free memory
for i in $(seq 0 $maxnode) ; do
free=$(numactl --hardware | fgrep " ${nlist[$i]} free" | awk '{print $4}')
free=$(( free * MB ))
if [[ $((free / PAGESIZE)) -ge $NEEDPAGES ]]; then
node[$n]=${nlist[$i]}
n=$((n + 1 ))
fi
done
numnodes=$n
maxnode=$(expr $numnodes - 1)
if [ $numnodes -lt 2 ] ; then
echo "need at least two nodes with at least $NEEDPAGES each of"
echo "free memory for mempolicy regression tests"
exit 77 # Skip test
fi
}
# =========================================================================
_test_process_state() {
echo '=>testing numactl' "$@" "memhog -H $SIZE"
numactl "$@" memhog -H $SIZE || failed
}
test_process_state()
{
declare -i n0=${node[0]} n1=${node[1]}
_test_process_state --interleave=$n1
a0=`nstat interleave_hit $n0`
a1=`nstat interleave_hit $n1`
_test_process_state --interleave=$n0,$n1
b0=`nstat interleave_hit $n0`
b1=`nstat interleave_hit $n1`
if [ $(expr $b1 - $a1) -lt $HALFPAGES ]; then
echo "interleaving test failed $n1 $b1 $a1"
failed
fi
if [ $(expr $b0 - $a0) -lt $HALFPAGES ]; then
echo "interleaving test failed $n0 $b0 $a0"
failed
fi
_test_process_state --interleave=all
_test_process_state --membind=all
a=$(expr $(nstat numa_hit $n0) + $(nstat numa_hit $n1))
_test_process_state --membind=$n0,$n1
b=$(expr $(nstat numa_hit $n0) + $(nstat numa_hit $n1))
if [ $(expr $b - $a) -lt $PAGES ]; then
echo "membind test failed $n1 $b $a ($PAGES)"
failed
fi
for i in "${node[@]}" ; do
a=`nstat numa_hit $i`
_test_process_state --membind=$i
_test_process_state --preferred=$i
b=`nstat numa_hit $i`
if [ $(expr $b - $a) -lt $DOUBLEPAGES ]; then
echo "membind/preferred on node $ni failed $b $a"
failed
fi
done
_test_process_state --localalloc
}
# =========================================================================
# test mbind
_test_mbind() {
echo '=>testing memhog -H' "$@"
memhog -H $SIZE "$@" || failed
}
test_mbind()
{
declare -i n0=${node[0]} n1=${node[1]}
a0=`nstat interleave_hit $n0`
a1=`nstat interleave_hit $n1`
_test_mbind interleave $n0,$n1
b0=`nstat interleave_hit $n0`
b1=`nstat interleave_hit $n1`
if [ $(expr $b1 - $a1) -lt $HALFPAGES ]; then
echo "interleaving test 2 failed $n1 $b1 $a1 expected $HALFPAGES"
failed
fi
if [ $(expr $b0 - $a0) -lt $HALFPAGES ]; then
echo "interleaving test 2 failed $n0 $b0 $a0"
failed
fi
_test_mbind interleave all
a=$(expr $(nstat numa_hit $n0) + $(nstat numa_hit $n1))
_test_mbind membind $n0,$n1
b=$(expr $(nstat numa_hit $n0) + $(nstat numa_hit $n1))
if [ $(expr $b - $a) -lt $PAGES ]; then
echo "membind test 2 failed $b $a ($PAGES)"
failed
fi
for i in "${node[@]}" ; do
declare -i ni=${node[$i]}
a=`nstat numa_hit $i`
_test_mbind membind $i
_test_mbind preferred $i
b=`nstat numa_hit $i`
if [ $(expr $b - $a) -lt $DOUBLEPAGES ]; then
echo "membind/preferred test 2 on node $ni failed $b $a"
failed
fi
done
}
# =========================================================================
main()
{
# Get the interval vm statistics refresh at
if [ -e /proc/sys/vm/stat_interval ]; then
STAT_INTERVAL=`cat /proc/sys/vm/stat_interval`
STAT_INTERVAL=`expr $STAT_INTERVAL \* 2`
fi
probe_hardware
numactl --cpubind=${node[0]} /bin/true
numactl --cpubind=${node[1]} /bin/true
numactl -s
numactl --hardware
numastat > A
test_process_state
test_mbind
numastat > B
diff -u A B
rm A B
if [ "$EXIT" = 0 ] ; then
echo '========SUCCESS'
else
echo '========FAILURE'
exit 1
fi
}
# =========================================================================
main
07070100000049000081A400003EA600000032000000016127204300000394000000000000000000000000000000000000002B00000000numactl-2.0.14.20.g4ee5e0c/test/regress-io#!/bin/bash
# test IO affinity parsing
# tests may fail depending on machine setup
testdir=`dirname "$0"`
: ${srcdir:=${testdir}/..}
: ${builddir:=${srcdir}}
export PATH=${builddir}:$PATH
E=0
check() {
echo testing $@
if "$@" ; then
true
else
echo failed
E=1
fi
}
fail() {
echo testing failure of $@
if "$@" ; then
echo failed
E=1
else
true
fi
}
check "${builddir}/test/node-parse" file:.
check "${builddir}/test/node-parse" ip:8.8.8.8
fail "${builddir}/test/node-parse" ip:127.0.0.1
IF=$(ip link ls | grep eth | cut -d: -f2 | head -1)
check "${builddir}/test/node-parse" "netdev:$IF"
fail "${builddir}/test/node-parse" netdev:lo
DEV=$(df | awk '/\/$/ { print $1 }')
check "${builddir}/test/node-parse" file:$DEV
check "${builddir}/test/node-parse" block:$(basename $DEV)
check "${builddir}/test/node-parse" pci:0:0.0
if [ "$E" = 0 ] ; then echo SUCCESS ; else echo FAILURE ; fi
exit $E
0707010000004A000081ED00003EA600000032000000016127204300000232000000000000000000000000000000000000002900000000numactl-2.0.14.20.g4ee5e0c/test/regress2#!/bin/sh
# More regression tests for libnuma/numa api
VALGRIND=${VALGRIND:-}
testdir=`dirname "$0"`
: ${srcdir:=${testdir}/..}
: ${builddir:=${srcdir}}
export PATH=${builddir}:$PATH
T() {
echo "$@"
if ! $VALGRIND "$@" ; then
echo $1 FAILED!!!!
exit 1
fi
echo
}
# still broken
#T "${builddir}/test/prefered"
T "${builddir}/test/distance"
T "${builddir}/test/nodemap"
T "${srcdir}/test/checkaffinity"
T "${srcdir}/test/checktopology"
T "${builddir}/test/tbitmap"
T "${srcdir}/test/bind_range"
#T "${builddir}/test/randmap"
0707010000004B000081ED00003EA6000000320000000161272043000002D9000000000000000000000000000000000000002700000000numactl-2.0.14.20.g4ee5e0c/test/runltp#!/bin/sh
# run the Linux Test Project with various numactl settings. will run for a few hours.
# must run as root
# You can download LTP from http://ltp.sourceforge.net
# Change LTP below to the source directory of a compiled LTP distribution
LTP=/src/ltp
LEN=2h
LTPOPT="-q -p -t $LEN"
export PATH=`pwd`/..:$PATH
cd $LTP
for i in 1 2 3 ; do
numactl --interleave=all ./runltp $LTPOPT -l n.interleave.all.$i
numactl --interleave=0,1 ./runltp $LTPOPT -l n.interleave.01.$i
numactl --preferred=0 --cpubind=1 ./runltp $LTPOPT -l n.preferred.$i
# the VM test that allocates all memory may fail
numactl --membind=1 --cpubind=0 ./runltp $LTPOPT -l n.membind1.$i
numactl --membind=0,1 ./runltp $LTPOPT -l n.membind01.$i
done
0707010000004C000081ED00003EA60000003200000001612720430000060D000000000000000000000000000000000000002800000000numactl-2.0.14.20.g4ee5e0c/test/shmtest#!/bin/sh
# basic shared memory policy test
# hugetlbfs and tmpfs must be mounted on these mount points
TMPFS=/dev/shm
HUGE=/huge
#valgrind 3.0.1 doesn't implement mbind() yet on x86-64
#VALGRIND="valgrind --tool=memcheck"
VALGRIND=
set -e
export PATH=`pwd`/..:$PATH
numactl() {
$VALGRIND ../numactl "$@"
}
failure() {
numastat > after
set +e
diff -u before after
echo
echo TEST FAILED
exit 1
}
success() {
echo test succeeded
}
checkpoint() {
numastat > before
}
trap failure EXIT
basictest() {
echo initial
checkpoint
numactl --length=20m $1 --dump
echo interleave
checkpoint
numactl --offset=2m --length=2m $1 --strict --interleave=0,1 --verify --dump
echo interleave verify
checkpoint
numactl $1 --dump
echo membind setup
checkpoint
numactl --offset 4m --length=2m $1 --strict --membind=1 --verify --dump
echo membind verify
checkpoint
numactl $1 --dump
echo preferred setup
checkpoint
numactl --offset 6m --length 2m $1 --strict --preferred=1 --verify --dump
echo preferred verify
checkpoint
numactl $1 --dump
# check overlaps here
}
cleanupshm() {
if [ -f $1 ] ; then
ipcrm -M `./ftok $1` || true
rm $1
fi
}
banner() {
echo
echo ++++++++++++ $1 +++++++++++++++
echo
}
banner shm
cleanupshm A
basictest --shm=A
cleanupshm A
banner hugeshm
cleanupshm B
basictest "--huge --shm=B"
cleanupshm B
banner tmpfs
basictest "--file $TMPFS/B"
rm $TMPFS/B
# first need a way to create holey hugetlbfs files.
#banner hugetlbfs
#basictest "--file $HUGE/B"
#rm /hugetlbfs/B
rm before
trap success EXIT
0707010000004D000081A400003EA600000032000000016127204300000B9A000000000000000000000000000000000000002A00000000numactl-2.0.14.20.g4ee5e0c/test/tbitmap.c/* Unit test bitmap parser */
#define _GNU_SOURCE 1
//#include <asm/bitops.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <ctype.h>
#include "numa.h"
#include "util.h"
/* For util.c. Fixme. */
void usage(void)
{
exit(1);
}
#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
#define test_bit(i,p) ((p)[(i) / BITS_PER_LONG] & (1UL << ((i)%BITS_PER_LONG)))
#define set_bit(i,p) ((p)[(i) / BITS_PER_LONG] |= (1UL << ((i)%BITS_PER_LONG)))
#define clear_bit(i,p) ((p)[(i) / BITS_PER_LONG] &= ~(1UL << ((i)%BITS_PER_LONG)))
typedef unsigned u32;
#define BITS_PER_LONG (sizeof(long)*8)
#define round_up(x,y) (((x) + (y) - 1) & ~((y)-1))
#define CPU_BYTES(x) (round_up(x, BITS_PER_LONG)/8)
#define CPU_LONGS(x) (CPU_BYTES(x) / sizeof(long))
/* Following routine extracted from Linux 2.6.16 */
#define CHUNKSZ 32
#define nbits_to_hold_value(val) fls(val)
#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10))
#define BASEDEC 10 /* fancier cpuset lists input in decimal */
/**
* bitmap_scnprintf - convert bitmap to an ASCII hex string.
* @buf: byte buffer into which string is placed
* @buflen: reserved size of @buf, in bytes
* @mask: pointer to struct bitmask to convert
*
* Hex digits are grouped into comma-separated sets of eight digits per set.
*/
int bitmap_scnprintf(char *buf, unsigned int buflen, struct bitmask *mask)
{
int i, word, bit, len = 0;
unsigned long val;
const char *sep = "";
int chunksz;
u32 chunkmask;
chunksz = mask->size & (CHUNKSZ - 1);
if (chunksz == 0)
chunksz = CHUNKSZ;
i = ALIGN(mask->size, CHUNKSZ) - CHUNKSZ;
for (; i >= 0; i -= CHUNKSZ) {
chunkmask = ((1ULL << chunksz) - 1);
word = i / BITS_PER_LONG;
bit = i % BITS_PER_LONG;
val = (mask->maskp[word] >> bit) & chunkmask;
len += snprintf(buf+len, buflen-len, "%s%0*lx", sep,
(chunksz+3)/4, val);
chunksz = CHUNKSZ;
sep = ",";
}
return len;
}
extern int numa_parse_bitmap(char *buf, struct bitmask *mask);
#define MASKSIZE 300
int main(void)
{
char buf[1024];
struct bitmask *mask, *mask2;
int i;
mask = numa_bitmask_alloc(MASKSIZE);
mask2 = numa_bitmask_alloc(MASKSIZE);
printf("Testing bitmap functions\n");
for (i = 0; i < MASKSIZE; i++) {
numa_bitmask_clearall(mask);
numa_bitmask_clearall(mask2);
numa_bitmask_setbit(mask, i);
assert(find_first(mask) == i);
bitmap_scnprintf(buf, sizeof(buf), mask);
strcat(buf,"\n");
if (numa_parse_bitmap(buf, mask2) < 0)
assert(0);
if (memcmp(mask->maskp, mask2->maskp, numa_bitmask_nbytes(mask))) {
bitmap_scnprintf(buf, sizeof(buf), mask2);
printf("mask2 differs: %s\n", buf);
assert(0);
}
}
printf("Passed\n");
return 0;
}
0707010000004E000081A400003EA6000000320000000161272043000003A7000000000000000000000000000000000000002A00000000numactl-2.0.14.20.g4ee5e0c/test/tshared.c#include <numa.h>
#include <numaif.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>
#define err(x) perror(x),exit(1)
enum SZ {
MEMSZ = 100<<20,
NTHR = 10,
};
/* test if shared interleaving state works. */
int main(void)
{
int i, k;
char *mem;
int pagesz = getpagesize();
int max_node;
if (numa_available() < 0) {
printf("no NUMA API available\n");
exit(1);
}
max_node = numa_max_node();
mem = numa_alloc_interleaved(MEMSZ);
for (i = 0; i < NTHR; i++) {
if (fork() == 0) {
for (k = i*pagesz; k < MEMSZ; k += pagesz * NTHR) {
mem[k] = 1;
}
_exit(0);
}
}
for (i = 0; i < NTHR; i++)
wait(NULL);
k = 0;
for (i = 0; i < MEMSZ; i += pagesz) {
int nd;
if (get_mempolicy(&nd, NULL, 0, mem + i, MPOL_F_NODE|MPOL_F_ADDR) < 0)
err("get_mempolicy");
if (nd != k)
printf("offset %d node %d expected %d\n", i, nd, k);
k = (k+1)%(max_node+1);
}
return 0;
}
0707010000004F000081A400003EA60000003200000001612720430000082D000000000000000000000000000000000000002700000000numactl-2.0.14.20.g4ee5e0c/test/tshm.c#include <sys/shm.h>
#include <sys/ipc.h>
#include <sys/fcntl.h>
#include <stdio.h>
#include <numaif.h>
#define err(x) perror(x),exit(1)
enum {
MEMSZ = 10*1024*1024,
};
struct req {
enum cmd {
SET = 1,
CHECK,
REPLY,
EXIT,
} cmd;
long offset;
long len;
int policy;
nodemask_t nodes;
};
void worker(void)
{
struct req req;
while (read(0, &req, sizeof(struct req) > 0)) {
switch (req.cmd) {
case SET:
if (mbind(map + req.offset, req.len, req.policy, &req.nodes,
NUMA_MAX_NODES+1, 0) < 0)
err("mbind");
break;
case TEST:
req.cmd = REPLY;
if (get_mempolicy(&req.policy, &req.nodes, NUMA_MAX_NODES+1,
map + req.offset, MPOL_F_ADDR) < 0)
err("get_mempolicy");
write(1, &req, sizeof(struct req));
break;
case EXIT:
return;
default:
abort();
}
}
}
void sendreq(int fd, enum cmd cmd, int policy, long offset, long len, nodemask_t nodes)
{
struct req req = {
.cmd = cmd,
.offset = offset,
.len = len,
.policy = policy,
.nodes = nodes
};
if (write(fd, &req, sizeof(struct req)) != sizeof(struct req))
panic("bad req write");
}
void readreq(int fd, int *policy, nodemask_t *nodes, long offset, long len)
{
struct req req;
if (read(fd, &req, sizeof(struct req)) != sizeof(struct req))
panic("bad req read");
if (req.cmd != REPLY)
abort();
*policy = req.policy;
*nodes = req.nodes;
}
int main(void)
{
int fd = open("tshm", O_CREAT, 0600);
close(fd);
key_t key = ftok("tshm", 1);
int shm = shmget(key, MEMSZ, IPC_CREAT|0600);
if (shm < 0) err("shmget");
char *map = shmat(shm, NULL, 0);
printf("map = %p\n", map);
unsigned long nmask = 0x3;
if (mbind(map, MEMSZ, MPOL_INTERLEAVE, &nmask, 4, 0) < 0) err("mbind1");
int fd[2];
if (pipe(fd) < 0) err("pipe");
if (fork() == 0) {
close(0);
close(1);
dup2(fd[0], 0);
dup2(fd[1], 1);
worker();
_exit(0);
}
int pagesz = getpagesize();
int i;
srand(1);
for (;;) {
/* chose random offset */
/* either in child or here */
/* change policy */
/* ask other guy to check */
}
shmdt(map);
shmctl(shm, IPC_RMID, 0);
}
07070100000050000081A400003EA600000032000000016127204300000AFD000000000000000000000000000000000000002200000000numactl-2.0.14.20.g4ee5e0c/util.c/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
numactl is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; version
2.
numactl is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should find a copy of v2 of the GNU General Public License somewhere
on your Linux system; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include "numa.h"
#include "numaif.h"
#include "util.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <ctype.h>
#include <errno.h>
#include <unistd.h>
void printmask(char *name, struct bitmask *mask)
{
int i;
printf("%s: ", name);
for (i = 0; i < mask->size; i++)
if (numa_bitmask_isbitset(mask, i))
printf("%d ", i);
putchar('\n');
}
int find_first(struct bitmask *mask)
{
int i;
for (i = 0; i < mask->size; i++)
if (numa_bitmask_isbitset(mask, i))
return i;
return -1;
}
void complain(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "numactl: ");
vfprintf(stderr,fmt,ap);
putchar('\n');
va_end(ap);
exit(1);
}
void nerror(char *fmt, ...)
{
int err = errno;
va_list ap;
va_start(ap,fmt);
fprintf(stderr, "numactl: ");
vfprintf(stderr, fmt, ap);
va_end(ap);
if (err)
fprintf(stderr,": %s\n", strerror(err));
else
fputc('\n', stderr);
exit(1);
}
long memsize(char *s)
{
char *end;
long length = strtoul(s,&end,0);
switch (toupper(*end)) {
case 'G': length *= 1024; /*FALL THROUGH*/
case 'M': length *= 1024; /*FALL THROUGH*/
case 'K': length *= 1024; break;
}
return length;
}
static struct policy {
char *name;
int policy;
int noarg;
} policies[] = {
{ "interleave", MPOL_INTERLEAVE, },
{ "membind", MPOL_BIND, },
{ "preferred", MPOL_PREFERRED, },
{ "default", MPOL_DEFAULT, 1 },
{ NULL },
};
static char *policy_names[] = { "default", "preferred", "bind", "interleave" };
char *policy_name(int policy)
{
static char buf[32];
if (policy >= array_len(policy_names)) {
sprintf(buf, "[%d]", policy);
return buf;
}
return policy_names[policy];
}
int parse_policy(char *name, char *arg)
{
int k;
struct policy *p = NULL;
if (!name)
return MPOL_DEFAULT;
for (k = 0; policies[k].name; k++) {
p = &policies[k];
if (!strcmp(p->name, name))
break;
}
if (!p || !p->name || (!arg && !p->noarg))
usage();
return p->policy;
}
void print_policies(void)
{
int i;
printf("Policies:");
for (i = 0; policies[i].name; i++)
printf(" %s", policies[i].name);
printf("\n");
}
07070100000051000081A400003EA600000032000000016127204300000332000000000000000000000000000000000000002200000000numactl-2.0.14.20.g4ee5e0c/util.hextern void printmask(char *name, struct bitmask *mask);
extern int find_first(struct bitmask *mask);
extern struct bitmask *nodemask(char *s);
extern struct bitmask *cpumask(char *s, int *ncpus);
extern int read_sysctl(char *name);
extern void complain(char *fmt, ...);
extern void nerror(char *fmt, ...);
/* defined in main module, but called by util.c */
extern void usage(void);
extern long memsize(char *s);
extern int parse_policy(char *name, char *arg);
extern void print_policies(void);
extern char *policy_name(int policy);
#define err(x) perror("numactl: " x),exit(1)
#define array_len(x) (sizeof(x)/sizeof(*(x)))
#define round_up(x,y) (((x) + (y) - 1) & ~((y)-1))
#if HAVE_ATTRIBUTE_SYMVER
#define SYMVER(a,b) __attribute__ ((symver (b)))
#else
#define SYMVER(a,b) __asm__ (".symver " a "," b);
#endif
07070100000052000081A400003EA600000032000000016127204300000F23000000000000000000000000000000000000002D00000000numactl-2.0.14.20.g4ee5e0c/versions.ldscript# Symbols defined in the library which aren't specifically bound to a
# version node are effectively bound to an unspecified base version of
# the library. It is possible to bind all otherwise unspecified symbols
# to a given version node using `global: *' somewhere in the version script.
#
# The interfaces at the "v1" level.
# At this level we present these functions to the linker (and thus to an
# application).
# Any functions not defined in the global list (i.e. "local") will be internal
# to the library (i.e. not exported but used within the library).
# Thus the real function names, "numa_bind_v1" etc, are local and won't
# be known to the linker.
# the first 16 have v1 aliases
# 3 of the 5 system calls that libnuma provides are common to all versions:
libnuma_1.1 {
global:
set_mempolicy;
get_mempolicy;
mbind;
numa_all_nodes;
numa_alloc;
numa_alloc_interleaved;
numa_alloc_interleaved_subset;
numa_alloc_local;
numa_alloc_onnode;
numa_available;
numa_bind;
numa_distance;
numa_error;
numa_exit_on_error;
numa_free;
numa_get_interleave_mask;
numa_get_interleave_node;
numa_get_membind;
numa_get_run_node_mask;
numa_interleave_memory;
numa_max_node;
numa_migrate_pages;
numa_no_nodes;
numa_node_size64;
numa_node_size;
numa_node_to_cpus;
numa_pagesize;
numa_parse_bitmap;
numa_police_memory;
numa_preferred;
numa_run_on_node;
numa_run_on_node_mask;
numa_sched_getaffinity;
numa_sched_setaffinity;
numa_set_bind_policy;
numa_set_interleave_mask;
numa_set_localalloc;
numa_set_membind;
numa_set_preferred;
numa_set_strict;
numa_setlocal_memory;
numa_tonode_memory;
numa_tonodemask_memory;
numa_warn;
numa_exit_on_warn;
numa_node_to_cpu_update;
local:
*;
};
# The interfaces at the "v2" level.
# The first 17 have v2 aliases
# We add the bitmask_ functions
# and the move_pages and migrate_pages system calls
# 1.2 depends on 1.1
libnuma_1.2 {
global:
copy_bitmask_to_nodemask;
copy_nodemask_to_bitmask;
copy_bitmask_to_bitmask;
move_pages;
migrate_pages;
numa_all_cpus_ptr;
numa_all_nodes_ptr;
numa_alloc_interleaved_subset;
numa_realloc;
numa_allocate_cpumask;
numa_allocate_nodemask;
numa_bind;
numa_bitmask_alloc;
numa_bitmask_clearall;
numa_bitmask_clearbit;
numa_bitmask_equal;
numa_bitmask_free;
numa_bitmask_isbitset;
numa_bitmask_nbytes;
numa_bitmask_setall;
numa_bitmask_setbit;
numa_bitmask_weight;
numa_get_interleave_mask;
numa_get_membind;
numa_get_mems_allowed;
numa_get_run_node_mask;
numa_interleave_memory;
numa_max_possible_node;
numa_move_pages;
numa_no_nodes_ptr;
numa_node_to_cpus;
numa_node_of_cpu;
numa_nodes_ptr;
numa_num_configured_cpus;
numa_num_configured_nodes;
numa_num_possible_nodes;
numa_num_task_cpus;
numa_num_task_nodes;
numa_num_thread_cpus;
numa_num_thread_nodes;
numa_parse_bitmap;
numa_parse_cpustring;
numa_parse_nodestring;
numa_run_on_node_mask;
numa_sched_getaffinity;
numa_sched_setaffinity;
numa_set_interleave_mask;
numa_set_membind;
numa_tonodemask_memory;
local:
*;
} libnuma_1.1;
# New parsing interface for cpu/numastrings
# was added into version 1.3
libnuma_1.3 {
global:
numa_parse_cpustring_all;
numa_parse_nodestring_all;
numa_num_possible_cpus;
local:
*;
} libnuma_1.2;
# New interface with customizable cpuset awareness
# was added into version 1.4
libnuma_1.4 {
global:
numa_run_on_node_mask_all;
local:
*;
} libnuma_1.3;
# New interface for membind with NUMA balancing optimization
libnuma_1.5 {
global:
numa_set_membind_balancing;
local:
*;
} libnuma_1.4;
07070100000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000B00000000TRAILER!!!790 blocks