File cpuminer-2.5.1.obscpio of Package cpuminer
07070100000000000081A4000003E800000064000000015EF4BCA10000004B000000000000000000000000000000000000001700000000cpuminer-2.5.1/AUTHORSJeff Garzik <jgarzik@pobox.com>
ArtForz
pooler <pooler@litecoinpool.org>
07070100000001000081A4000003E800000064000000015EF4BCA100004648000000000000000000000000000000000000001700000000cpuminer-2.5.1/COPYING GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Library General
Public License instead of this License.
07070100000002000081A4000003E800000064000000015EF4BCA100000033000000000000000000000000000000000000001900000000cpuminer-2.5.1/ChangeLogSee git repository ('git log') for full changelog.
07070100000003000081A4000003E800000064000000015EF4BCA1000002A1000000000000000000000000000000000000001A00000000cpuminer-2.5.1/Dockerfile#
# Dockerfile for cpuminer
# usage: docker run creack/cpuminer --url xxxx --user xxxx --pass xxxx
# ex: docker run creack/cpuminer --url stratum+tcp://ltc.pool.com:80 --user creack.worker1 --pass abcdef
#
#
FROM ubuntu:16.04
MAINTAINER Guillaume J. Charmes <guillaume@charmes.net>
RUN apt-get update -qq && \
apt-get install -qqy automake libcurl4-openssl-dev git make gcc
RUN git clone https://github.com/pooler/cpuminer
RUN cd cpuminer && \
./autogen.sh && \
./configure CFLAGS="-O3" && \
make
WORKDIR /cpuminer
ENTRYPOINT ["./minerd"]
07070100000004000081A4000003E800000064000000015EF4BCA100000065000000000000000000000000000000000000001700000000cpuminer-2.5.1/LICENSEcpuminer is available under the terms of the GNU Public License version 2.
See COPYING for details.
07070100000005000081A4000003E800000064000000015EF4BCA1000002FF000000000000000000000000000000000000001B00000000cpuminer-2.5.1/Makefile.am
if WANT_JANSSON
JANSSON_INCLUDES= -I$(top_srcdir)/compat/jansson
else
JANSSON_INCLUDES=
endif
EXTRA_DIST = example-cfg.json nomacro.pl
SUBDIRS = compat
bin_PROGRAMS = minerd
dist_man_MANS = minerd.1
minerd_SOURCES = elist.h miner.h compat.h \
cpu-miner.c util.c \
sha2.c scrypt.c
if USE_ASM
if ARCH_x86
minerd_SOURCES += sha2-x86.S scrypt-x86.S
endif
if ARCH_x86_64
minerd_SOURCES += sha2-x64.S scrypt-x64.S
endif
if ARCH_ARM
minerd_SOURCES += sha2-arm.S scrypt-arm.S
endif
if ARCH_PPC
minerd_SOURCES += sha2-ppc.S scrypt-ppc.S
endif
endif
minerd_LDFLAGS = $(PTHREAD_FLAGS)
minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@
minerd_CFLAGS = -fno-strict-aliasing
minerd_CPPFLAGS = @LIBCURL_CPPFLAGS@ $(JANSSON_INCLUDES) $(PTHREAD_FLAGS)
07070100000006000081A4000003E800000064000000015EF4BCA100002734000000000000000000000000000000000000001400000000cpuminer-2.5.1/NEWSVersion 2.5.1 - Jun 25, 2020
- Add support for bech32 addresses (BIP 173)
Version 2.5.0 - Jun 22, 2017
- Add Segwit support
- Add support for 64-bit PowerPC
Version 2.4.5 - Jun 10, 2016
- Fix a memory leak affecting long polling
Version 2.4.4 - Mar 24, 2016
- Fix memory leaks affecting getblocktemplate
Version 2.4.3 - Jan 24, 2016
- Add support for the VIA PadLock Hash Engine on x86-64
- Allow block version 4 when using getblocktemplate
Version 2.4.2 - Apr 28, 2015
- Add support for Stratum over TLS
- Allow block version 3 when using getblocktemplate
Version 2.4.1 - Feb 25, 2015
- Add support for scrypt(N, 1, 1)
- Add optimized PowerPC code for scrypt and SHA-256d
- Allow use of getblocktemplate with merge-mined cryptocurrencies
- Automatically switch to getwork if no payout address is provided
- Fix CVE-2014-6251
Version 2.4 - May 20, 2014
- Add support for the getblocktemplate RPC method (BIP 22)
- Allow tunnelling Stratum through HTTP proxies
- Add a --no-redirect option to ignore redirection requests
- Timeout for long polling is now disabled by default
- Fix CPU affinity on Linux (kiyominer)
- Add support for building under 64-bit Cygwin
- Expand version information with build details
Version 2.3.3 - Feb 27, 2014
- The --url option is now mandatory
- Do not switch to Stratum when using an HTTP proxy
- Fix scheduling policy change on Linux (clbr)
- Fix CPU affinity on FreeBSD (ache)
- Compatibility fixes for various platforms, including Solaris 8
and old versions of OS X
- A man page for minerd is now available
Version 2.3.2 - Jul 10, 2013
- Add optimizations for AVX2-capable x86-64 processors
- Ensure that the output stream is flushed after every log message
- Fix an undefined-behavior bug in the Stratum code
Version 2.3.1 - Jun 18, 2013
- Add a --cert option for specifying an SSL certificate (martinwguy)
- Fix a bug that only made SHA-256d mining work at difficulty 1
- Fix a couple of compatibility issues with some Stratum servers
Version 2.3 - Jun 12, 2013
- Add support for the Stratum mining protocol
- Automatically switch to Stratum if the mining server supports
the X-Stratum extension, unless --no-stratum is used
- Set CPU affinity on FreeBSD (lye)
- Fix a bug in libcurl initialization (martinwguy)
Version 2.2.3 - Aug 5, 2012
- Add optimized ARM NEON code for scrypt and SHA-256d
- Add a --benchmark option that allows offline testing
- Support for the X-Reject-Reason extension
Version 2.2.2 - Jun 7, 2012
- Various performance improvements for x86 and x86-64
- Optimize scrypt for ARMv5E and later processors
- Set the priority of miner threads to idle on Windows
- Add an option to start minerd as a daemon on POSIX systems
Version 2.2.1 - May 2, 2012
- Add optimized code for ARM processors
- Support for building on NetBSD and OpenBSD
- Various compatibility fixes for AIX (pontius)
Version 2.2 - Apr 2, 2012
- Add an optimized SHA-256d algorithm, with specialized code
for x86 and x86-64 and support for AVX and XOP instructions
- Slight performance increase for scrypt on x86 and x86-64
- The default timeout is now 270 seconds
Version 2.1.5 - Mar 7, 2012
- Add optimizations for AVX-capable x86-64 processors
- Assume HTTP if no protocol is specified for the mining server
- Fix MinGW compatibility issues and update build instructions
- Add support for building on Solaris using gcc (pontius)
Version 2.1.4 - Feb 28, 2012
- Implement 4-way SHA-256 on x86-64
- Add TCP keepalive to long polling connections
- Support HTTP and SOCKS proxies via the --proxy option
- Username and password are no longer mandatory
- Add a script that makes assembly code compatible with old versions
of the GNU assembler that do not support macros
Version 2.1.3 - Feb 12, 2012
- Smart handling of long polling failures: switch to short scan time
if long polling fails, and only try to reactivate it if the server
continues to advertise the feature in HTTP headers
- Add "X-Mining-Extensions: midstate" to HTTP headers (p2k)
- Add support for the "submitold" extension, used by p2pool
- It is now possible to specify username and password in the URL,
like this: http://username:password@host:port/
- Add a --version option, and clean up --help output
- Avoid division by zero when computing hash rates
- Handle empty responses properly (TimothyA)
- Eliminate the delay between starting threads
Version 2.1.2 - Jan 26, 2012
- Do not submit work that is known to be stale
- Allow miner threads to ask for new work if the current one is at least
45 seconds old and long polling is enabled
- Refresh work when long polling times out
- Fix minor speed regression
- Modify x86-64 code to make it compatible with older versions of binutils
Version 2.1.1 - Jan 20, 2012
- Handle network errors properly
- Make scantime retargeting more accurate
Version 2.1 - Jan 19, 2012
- Share the same work among all threads
- Do not ask for new work if the current one is not expired
- Do not discard the work returned by long polling
Version 2.0 - Jan 16, 2012
- Change default port to 9332 for Litecoin and remove default credentials
- Add 'scrypt' as the default algorithm and remove other algorithms (ArtForz)
- Optimize scrypt for x86 and x86-64
- Make scantime retargeting less granular (ArtForz)
- Test the whole hash instead of just looking at the high 32 bits
- Add configurable timeout, with a default of 180 seconds
- Add share summary output (inlikeflynn)
- Fix priority and CPU count detection on Windows
- Fix parameters -u and -p, and add short options -o and -O
Version 1.0.2 - Jun 13, 2011
- Linux x86_64 optimisations - Con Kolivas
- Optimise for x86_64 by default by using sse2_64 algo
- Detects CPUs and sets number of threads accordingly
- Uses CPU affinity for each thread where appropriate
- Sets scheduling policy to lowest possible
- Minor performance tweaks
Version 1.0.1 - May 14, 2011
- OSX support
Version 1.0 - May 9, 2011
- jansson 2.0 compatibility
- correct off-by-one in date (month) display output
- fix platform detection
- improve yasm configure bits
- support full URL, in X-Long-Polling header
Version 0.8.1 - March 22, 2011
- Make --user, --pass actually work
- Add User-Agent HTTP header to requests, so that server operators may
more easily identify the miner client.
- Fix minor bug in example JSON config file
Version 0.8 - March 21, 2011
- Support long polling: http://deepbit.net/longpolling.php
- Adjust max workload based on scantime (default 5 seconds,
or 60 seconds for longpoll)
- Standardize program output, and support syslog on Unix platforms
- Suport --user/--pass options (and "user" and "pass" in config file),
as an alternative to the current --userpass
Version 0.7.2 - March 14, 2011
- Add port of ufasoft's sse2 assembly implementation (Linux only)
This is a substantial speed improvement on Intel CPUs.
- Move all JSON-RPC I/O to separate thread. This reduces the
number of HTTP connections from one-per-thread to one, reducing resource
usage on upstream bitcoind / pool server.
Version 0.7.1 - March 2, 2011
- Add support for JSON-format configuration file. See example
file example-cfg.json. Any long argument on the command line
may be stored in the config file.
- Timestamp each solution found
- Improve sha256_4way performance. NOTE: This optimization makes
the 'hash' debug-print output for sha256_way incorrect.
- Use __builtin_expect() intrinsic as compiler micro-optimization
- Build on Intel compiler
- HTTP library now follows HTTP redirects
Version 0.7 - February 12, 2011
- Re-use CURL object, thereby reuseing DNS cache and HTTP connections
- Use bswap_32, if compiler intrinsic is not available
- Disable full target validation (as opposed to simply H==0) for now
Version 0.6.1 - February 4, 2011
- Fully validate "hash < target", rather than simply stopping our scan
if the high 32 bits are 00000000.
- Add --retry-pause, to set length of pause time between failure retries
- Display proof-of-work hash and target, if -D (debug mode) enabled
- Fix max-nonce auto-adjustment to actually work. This means if your
scan takes longer than 5 seconds (--scantime), the miner will slowly
reduce the number of hashes you work on, before fetching a new work unit.
Version 0.6 - January 29, 2011
- Fetch new work unit, if scanhash takes longer than 5 seconds (--scantime)
- BeeCee1's sha256 4way optimizations
- lfm's byte swap optimization (improves via, cryptopp)
- Fix non-working short options -q, -r
Version 0.5 - December 28, 2010
- Exit program, when all threads have exited
- Improve JSON-RPC failure diagnostics and resilience
- Add --quiet option, to disable hashmeter output.
Version 0.3.3 - December 27, 2010
- Critical fix for sha256_cryptopp 'cryptopp_asm' algo
Version 0.3.2 - December 23, 2010
- Critical fix for sha256_via
Version 0.3.1 - December 19, 2010
- Critical fix for sha256_via
- Retry JSON-RPC failures (see --retry, under "minerd --help" output)
Version 0.3 - December 18, 2010
- Add crypto++ 32bit assembly implementation
- show version upon 'minerd --help'
- work around gcc 4.5.x bug that killed 4way performance
Version 0.2.2 - December 6, 2010
- VIA padlock implementation works now
- Minor build and runtime fixes
Version 0.2.1 - November 29, 2010
- avoid buffer overflow when submitting solutions
- add Crypto++ sha256 implementation (C only, ASM elided for now)
- minor internal optimizations and cleanups
Version 0.2 - November 27, 2010
- Add script for building a Windows installer
- improve hash performance (hashmeter) statistics
- add tcatm 4way sha256 implementation
- Add experimental VIA Padlock sha256 implementation
Version 0.1.2 - November 26, 2010
- many small cleanups and micro-optimizations
- build win32 exe using mingw
- RPC URL, username/password become command line arguments
- remove unused OpenSSL dependency
Version 0.1.1 - November 24, 2010
- Do not build sha256_generic module separately from cpuminer.
Version 0.1 - November 24, 2010
- Initial release.
07070100000007000081A4000003E800000064000000015EF4BCA100000BED000000000000000000000000000000000000001600000000cpuminer-2.5.1/READMEThis is a multi-threaded CPU miner for Litecoin and Bitcoin,
fork of Jeff Garzik's reference cpuminer.
License: GPLv2. See COPYING for details.
Downloads: https://sourceforge.net/projects/cpuminer/files/
Git tree: https://github.com/pooler/cpuminer
Dependencies:
libcurl http://curl.haxx.se/libcurl/
jansson http://www.digip.org/jansson/
(jansson is included in-tree)
Basic *nix build instructions:
./autogen.sh # only needed if building from git repo
./nomacro.pl # in case the assembler doesn't support macros
./configure CFLAGS="-O3" # make sure -O3 is an O and not a zero!
make
Notes for AIX users:
* To build a 64-bit binary, export OBJECT_MODE=64
* GNU-style long options are not supported, but are accessible
via configuration file
Basic Windows build instructions, using MinGW:
Install MinGW and the MSYS Developer Tool Kit (http://www.mingw.org/)
* Make sure you have mstcpip.h in MinGW\include
If using MinGW-w64, install pthreads-w64
Install libcurl devel (http://curl.haxx.se/download.html)
* Make sure you have libcurl.m4 in MinGW\share\aclocal
* Make sure you have curl-config in MinGW\bin
In the MSYS shell, run:
./autogen.sh # only needed if building from git repo
LIBCURL="-lcurldll" ./configure CFLAGS="-O3"
make
Architecture-specific notes:
ARM: No runtime CPU detection. The miner can take advantage
of some instructions specific to ARMv5E and later processors,
but the decision whether to use them is made at compile time,
based on compiler-defined macros.
To use NEON instructions, add "-mfpu=neon" to CFLAGS.
PowerPC: No runtime CPU detection.
To use AltiVec instructions, add "-maltivec" to CFLAGS.
x86: The miner checks for SSE2 instructions support at runtime,
and uses them if they are available.
x86-64: The miner can take advantage of AVX, AVX2 and XOP instructions,
but only if both the CPU and the operating system support them.
* Linux supports AVX starting from kernel version 2.6.30.
* FreeBSD supports AVX starting with 9.1-RELEASE.
* Mac OS X added AVX support in the 10.6.8 update.
* Windows supports AVX starting from Windows 7 SP1 and
Windows Server 2008 R2 SP1.
The configure script outputs a warning if the assembler
doesn't support some instruction sets. In that case, the miner
can still be built, but unavailable optimizations are left off.
The miner uses the VIA Padlock Hash Engine where available.
Usage instructions: Run "minerd --help" to see options.
Connecting through a proxy: Use the --proxy option.
To use a SOCKS proxy, add a socks4:// or socks5:// prefix to the proxy host.
Protocols socks4a and socks5h, allowing remote name resolving, are also
available since libcurl 7.18.0.
If no protocol is specified, the proxy is assumed to be a HTTP proxy.
When the --proxy option is not used, the program honors the http_proxy
and all_proxy environment variables.
Also many issues and FAQs are covered in the forum thread
dedicated to this program,
https://bitcointalk.org/index.php?topic=55038.0
07070100000008000081ED000003E800000064000000015EF4BCA1000000BA000000000000000000000000000000000000001A00000000cpuminer-2.5.1/autogen.sh#!/bin/sh
# You need autoconf 2.5x, preferably 2.57 or later
# You need automake 1.7 or later. 1.6 might work.
set -e
aclocal
autoheader
automake --gnu --add-missing --copy
autoconf
07070100000009000041ED000003E800000064000000015EF4BCA100000000000000000000000000000000000000000000001600000000cpuminer-2.5.1/compat0707010000000A000081A4000003E800000064000000015EF4BCA10000014E000000000000000000000000000000000000001800000000cpuminer-2.5.1/compat.h#ifndef __COMPAT_H__
#define __COMPAT_H__
#ifdef WIN32
#include <windows.h>
#define sleep(secs) Sleep((secs) * 1000)
enum {
PRIO_PROCESS = 0,
};
static inline int setpriority(int which, int who, int prio)
{
return -!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_IDLE);
}
#endif /* WIN32 */
#endif /* __COMPAT_H__ */
0707010000000B000081A4000003E800000064000000015EF4BCA100000039000000000000000000000000000000000000002200000000cpuminer-2.5.1/compat/Makefile.am
if WANT_JANSSON
SUBDIRS = jansson
else
SUBDIRS =
endif
0707010000000C000041ED000003E800000064000000015EF4BCA100000000000000000000000000000000000000000000001E00000000cpuminer-2.5.1/compat/jansson0707010000000D000081A4000003E800000064000000015EF4BCA10000043A000000000000000000000000000000000000002600000000cpuminer-2.5.1/compat/jansson/LICENSECopyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
0707010000000E000081A4000003E800000064000000015EF4BCA100000120000000000000000000000000000000000000002A00000000cpuminer-2.5.1/compat/jansson/Makefile.am
noinst_LIBRARIES = libjansson.a
libjansson_a_SOURCES = \
config.h \
dump.c \
hashtable.c \
hashtable.h \
jansson.h \
jansson_private.h \
load.c \
strbuffer.c \
strbuffer.h \
utf.c \
utf.h \
util.h \
value.c
0707010000000F000081A4000003E800000064000000015EF4BCA100000874000000000000000000000000000000000000002700000000cpuminer-2.5.1/compat/jansson/config.h/* config.h. Generated from config.h.in by configure. */
/* config.h.in. Generated from configure.ac by autoheader. */
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#define LT_OBJDIR ".libs/"
/* Name of package */
#define PACKAGE "jansson"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT "petri@digip.org"
/* Define to the full name of this package. */
#define PACKAGE_NAME "jansson"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "jansson 1.3"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "jansson"
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.3"
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Version number of package */
#define VERSION "1.3"
/* Define to `__inline__' or `__inline' if that's what the C compiler
calls it, or to nothing if 'inline' is not supported under any name. */
#ifndef __cplusplus
/* #undef inline */
#endif
/* Define to the type of a signed integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
/* #undef int32_t */
07070100000010000081A4000003E800000064000000015EF4BCA100003171000000000000000000000000000000000000002500000000cpuminer-2.5.1/compat/jansson/dump.c/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* Jansson is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <jansson.h>
#include "jansson_private.h"
#include "strbuffer.h"
#include "utf.h"
#define MAX_INTEGER_STR_LENGTH 100
#define MAX_REAL_STR_LENGTH 100
typedef int (*dump_func)(const char *buffer, int size, void *data);
struct string
{
char *buffer;
int length;
int size;
};
static int dump_to_strbuffer(const char *buffer, int size, void *data)
{
return strbuffer_append_bytes((strbuffer_t *)data, buffer, size);
}
static int dump_to_file(const char *buffer, int size, void *data)
{
FILE *dest = (FILE *)data;
if(fwrite(buffer, size, 1, dest) != 1)
return -1;
return 0;
}
/* 256 spaces (the maximum indentation size) */
static char whitespace[] = " ";
static int dump_indent(unsigned long flags, int depth, int space, dump_func dump, void *data)
{
if(JSON_INDENT(flags) > 0)
{
int i, ws_count = JSON_INDENT(flags);
if(dump("\n", 1, data))
return -1;
for(i = 0; i < depth; i++)
{
if(dump(whitespace, ws_count, data))
return -1;
}
}
else if(space && !(flags & JSON_COMPACT))
{
return dump(" ", 1, data);
}
return 0;
}
static int dump_string(const char *str, int ascii, dump_func dump, void *data)
{
const char *pos, *end;
int32_t codepoint;
if(dump("\"", 1, data))
return -1;
end = pos = str;
while(1)
{
const char *text;
char seq[13];
int length;
while(*end)
{
end = utf8_iterate(pos, &codepoint);
if(!end)
return -1;
/* mandatory escape or control char */
if(codepoint == '\\' || codepoint == '"' || codepoint < 0x20)
break;
/* non-ASCII */
if(ascii && codepoint > 0x7F)
break;
pos = end;
}
if(pos != str) {
if(dump(str, pos - str, data))
return -1;
}
if(end == pos)
break;
/* handle \, ", and control codes */
length = 2;
switch(codepoint)
{
case '\\': text = "\\\\"; break;
case '\"': text = "\\\""; break;
case '\b': text = "\\b"; break;
case '\f': text = "\\f"; break;
case '\n': text = "\\n"; break;
case '\r': text = "\\r"; break;
case '\t': text = "\\t"; break;
default:
{
/* codepoint is in BMP */
if(codepoint < 0x10000)
{
sprintf(seq, "\\u%04x", codepoint);
length = 6;
}
/* not in BMP -> construct a UTF-16 surrogate pair */
else
{
int32_t first, last;
codepoint -= 0x10000;
first = 0xD800 | ((codepoint & 0xffc00) >> 10);
last = 0xDC00 | (codepoint & 0x003ff);
sprintf(seq, "\\u%04x\\u%04x", first, last);
length = 12;
}
text = seq;
break;
}
}
if(dump(text, length, data))
return -1;
str = pos = end;
}
return dump("\"", 1, data);
}
static int object_key_compare_keys(const void *key1, const void *key2)
{
return strcmp((*(const object_key_t **)key1)->key,
(*(const object_key_t **)key2)->key);
}
static int object_key_compare_serials(const void *key1, const void *key2)
{
return (*(const object_key_t **)key1)->serial -
(*(const object_key_t **)key2)->serial;
}
static int do_dump(const json_t *json, unsigned long flags, int depth,
dump_func dump, void *data)
{
int ascii = flags & JSON_ENSURE_ASCII ? 1 : 0;
switch(json_typeof(json)) {
case JSON_NULL:
return dump("null", 4, data);
case JSON_TRUE:
return dump("true", 4, data);
case JSON_FALSE:
return dump("false", 5, data);
case JSON_INTEGER:
{
char buffer[MAX_INTEGER_STR_LENGTH];
int size;
size = snprintf(buffer, MAX_INTEGER_STR_LENGTH, "%d", json_integer_value(json));
if(size >= MAX_INTEGER_STR_LENGTH)
return -1;
return dump(buffer, size, data);
}
case JSON_REAL:
{
char buffer[MAX_REAL_STR_LENGTH];
int size;
size = snprintf(buffer, MAX_REAL_STR_LENGTH, "%.17g",
json_real_value(json));
if(size >= MAX_REAL_STR_LENGTH)
return -1;
/* Make sure there's a dot or 'e' in the output. Otherwise
a real is converted to an integer when decoding */
if(strchr(buffer, '.') == NULL &&
strchr(buffer, 'e') == NULL)
{
if(size + 2 >= MAX_REAL_STR_LENGTH) {
/* No space to append ".0" */
return -1;
}
buffer[size] = '.';
buffer[size + 1] = '0';
size += 2;
}
return dump(buffer, size, data);
}
case JSON_STRING:
return dump_string(json_string_value(json), ascii, dump, data);
case JSON_ARRAY:
{
int i;
int n;
json_array_t *array;
/* detect circular references */
array = json_to_array(json);
if(array->visited)
goto array_error;
array->visited = 1;
n = json_array_size(json);
if(dump("[", 1, data))
goto array_error;
if(n == 0) {
array->visited = 0;
return dump("]", 1, data);
}
if(dump_indent(flags, depth + 1, 0, dump, data))
goto array_error;
for(i = 0; i < n; ++i) {
if(do_dump(json_array_get(json, i), flags, depth + 1,
dump, data))
goto array_error;
if(i < n - 1)
{
if(dump(",", 1, data) ||
dump_indent(flags, depth + 1, 1, dump, data))
goto array_error;
}
else
{
if(dump_indent(flags, depth, 0, dump, data))
goto array_error;
}
}
array->visited = 0;
return dump("]", 1, data);
array_error:
array->visited = 0;
return -1;
}
case JSON_OBJECT:
{
json_object_t *object;
void *iter;
const char *separator;
int separator_length;
if(flags & JSON_COMPACT) {
separator = ":";
separator_length = 1;
}
else {
separator = ": ";
separator_length = 2;
}
/* detect circular references */
object = json_to_object(json);
if(object->visited)
goto object_error;
object->visited = 1;
iter = json_object_iter((json_t *)json);
if(dump("{", 1, data))
goto object_error;
if(!iter) {
object->visited = 0;
return dump("}", 1, data);
}
if(dump_indent(flags, depth + 1, 0, dump, data))
goto object_error;
if(flags & JSON_SORT_KEYS || flags & JSON_PRESERVE_ORDER)
{
const object_key_t **keys;
unsigned int size;
unsigned int i;
int (*cmp_func)(const void *, const void *);
size = json_object_size(json);
keys = malloc(size * sizeof(object_key_t *));
if(!keys)
goto object_error;
i = 0;
while(iter)
{
keys[i] = jsonp_object_iter_fullkey(iter);
iter = json_object_iter_next((json_t *)json, iter);
i++;
}
assert(i == size);
if(flags & JSON_SORT_KEYS)
cmp_func = object_key_compare_keys;
else
cmp_func = object_key_compare_serials;
qsort(keys, size, sizeof(object_key_t *), cmp_func);
for(i = 0; i < size; i++)
{
const char *key;
json_t *value;
key = keys[i]->key;
value = json_object_get(json, key);
assert(value);
dump_string(key, ascii, dump, data);
if(dump(separator, separator_length, data) ||
do_dump(value, flags, depth + 1, dump, data))
{
free(keys);
goto object_error;
}
if(i < size - 1)
{
if(dump(",", 1, data) ||
dump_indent(flags, depth + 1, 1, dump, data))
{
free(keys);
goto object_error;
}
}
else
{
if(dump_indent(flags, depth, 0, dump, data))
{
free(keys);
goto object_error;
}
}
}
free(keys);
}
else
{
/* Don't sort keys */
while(iter)
{
void *next = json_object_iter_next((json_t *)json, iter);
dump_string(json_object_iter_key(iter), ascii, dump, data);
if(dump(separator, separator_length, data) ||
do_dump(json_object_iter_value(iter), flags, depth + 1,
dump, data))
goto object_error;
if(next)
{
if(dump(",", 1, data) ||
dump_indent(flags, depth + 1, 1, dump, data))
goto object_error;
}
else
{
if(dump_indent(flags, depth, 0, dump, data))
goto object_error;
}
iter = next;
}
}
object->visited = 0;
return dump("}", 1, data);
object_error:
object->visited = 0;
return -1;
}
default:
/* not reached */
return -1;
}
}
char *json_dumps(const json_t *json, unsigned long flags)
{
strbuffer_t strbuff;
char *result;
if(!json_is_array(json) && !json_is_object(json))
return NULL;
if(strbuffer_init(&strbuff))
return NULL;
if(do_dump(json, flags, 0, dump_to_strbuffer, (void *)&strbuff)) {
strbuffer_close(&strbuff);
return NULL;
}
result = strdup(strbuffer_value(&strbuff));
strbuffer_close(&strbuff);
return result;
}
int json_dumpf(const json_t *json, FILE *output, unsigned long flags)
{
if(!json_is_array(json) && !json_is_object(json))
return -1;
return do_dump(json, flags, 0, dump_to_file, (void *)output);
}
int json_dump_file(const json_t *json, const char *path, unsigned long flags)
{
int result;
FILE *output = fopen(path, "w");
if(!output)
return -1;
result = json_dumpf(json, output, flags);
fclose(output);
return result;
}
07070100000011000081A4000003E800000064000000015EF4BCA10000233A000000000000000000000000000000000000002A00000000cpuminer-2.5.1/compat/jansson/hashtable.c/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#include <config.h>
#include <stdlib.h>
#include "hashtable.h"
typedef struct hashtable_list list_t;
typedef struct hashtable_pair pair_t;
typedef struct hashtable_bucket bucket_t;
#define container_of(ptr_, type_, member_) \
((type_ *)((char *)ptr_ - (size_t)&((type_ *)0)->member_))
#define list_to_pair(list_) container_of(list_, pair_t, list)
static inline void list_init(list_t *list)
{
list->next = list;
list->prev = list;
}
static inline void list_insert(list_t *list, list_t *node)
{
node->next = list;
node->prev = list->prev;
list->prev->next = node;
list->prev = node;
}
static inline void list_remove(list_t *list)
{
list->prev->next = list->next;
list->next->prev = list->prev;
}
static inline int bucket_is_empty(hashtable_t *hashtable, bucket_t *bucket)
{
return bucket->first == &hashtable->list && bucket->first == bucket->last;
}
static void insert_to_bucket(hashtable_t *hashtable, bucket_t *bucket,
list_t *list)
{
if(bucket_is_empty(hashtable, bucket))
{
list_insert(&hashtable->list, list);
bucket->first = bucket->last = list;
}
else
{
list_insert(bucket->first, list);
bucket->first = list;
}
}
static unsigned int primes[] = {
5, 13, 23, 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593,
49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469,
12582917, 25165843, 50331653, 100663319, 201326611, 402653189,
805306457, 1610612741
};
static const unsigned int num_primes = sizeof(primes) / sizeof(unsigned int);
static inline unsigned int num_buckets(hashtable_t *hashtable)
{
return primes[hashtable->num_buckets];
}
static pair_t *hashtable_find_pair(hashtable_t *hashtable, bucket_t *bucket,
const void *key, unsigned int hash)
{
list_t *list;
pair_t *pair;
if(bucket_is_empty(hashtable, bucket))
return NULL;
list = bucket->first;
while(1)
{
pair = list_to_pair(list);
if(pair->hash == hash && hashtable->cmp_keys(pair->key, key))
return pair;
if(list == bucket->last)
break;
list = list->next;
}
return NULL;
}
/* returns 0 on success, -1 if key was not found */
static int hashtable_do_del(hashtable_t *hashtable,
const void *key, unsigned int hash)
{
pair_t *pair;
bucket_t *bucket;
unsigned int index;
index = hash % num_buckets(hashtable);
bucket = &hashtable->buckets[index];
pair = hashtable_find_pair(hashtable, bucket, key, hash);
if(!pair)
return -1;
if(&pair->list == bucket->first && &pair->list == bucket->last)
bucket->first = bucket->last = &hashtable->list;
else if(&pair->list == bucket->first)
bucket->first = pair->list.next;
else if(&pair->list == bucket->last)
bucket->last = pair->list.prev;
list_remove(&pair->list);
if(hashtable->free_key)
hashtable->free_key(pair->key);
if(hashtable->free_value)
hashtable->free_value(pair->value);
free(pair);
hashtable->size--;
return 0;
}
static void hashtable_do_clear(hashtable_t *hashtable)
{
list_t *list, *next;
pair_t *pair;
for(list = hashtable->list.next; list != &hashtable->list; list = next)
{
next = list->next;
pair = list_to_pair(list);
if(hashtable->free_key)
hashtable->free_key(pair->key);
if(hashtable->free_value)
hashtable->free_value(pair->value);
free(pair);
}
}
static int hashtable_do_rehash(hashtable_t *hashtable)
{
list_t *list, *next;
pair_t *pair;
unsigned int i, index, new_size;
free(hashtable->buckets);
hashtable->num_buckets++;
new_size = num_buckets(hashtable);
hashtable->buckets = malloc(new_size * sizeof(bucket_t));
if(!hashtable->buckets)
return -1;
for(i = 0; i < num_buckets(hashtable); i++)
{
hashtable->buckets[i].first = hashtable->buckets[i].last =
&hashtable->list;
}
list = hashtable->list.next;
list_init(&hashtable->list);
for(; list != &hashtable->list; list = next) {
next = list->next;
pair = list_to_pair(list);
index = pair->hash % new_size;
insert_to_bucket(hashtable, &hashtable->buckets[index], &pair->list);
}
return 0;
}
hashtable_t *hashtable_create(key_hash_fn hash_key, key_cmp_fn cmp_keys,
free_fn free_key, free_fn free_value)
{
hashtable_t *hashtable = malloc(sizeof(hashtable_t));
if(!hashtable)
return NULL;
if(hashtable_init(hashtable, hash_key, cmp_keys, free_key, free_value))
{
free(hashtable);
return NULL;
}
return hashtable;
}
void hashtable_destroy(hashtable_t *hashtable)
{
hashtable_close(hashtable);
free(hashtable);
}
int hashtable_init(hashtable_t *hashtable,
key_hash_fn hash_key, key_cmp_fn cmp_keys,
free_fn free_key, free_fn free_value)
{
unsigned int i;
hashtable->size = 0;
hashtable->num_buckets = 0; /* index to primes[] */
hashtable->buckets = malloc(num_buckets(hashtable) * sizeof(bucket_t));
if(!hashtable->buckets)
return -1;
list_init(&hashtable->list);
hashtable->hash_key = hash_key;
hashtable->cmp_keys = cmp_keys;
hashtable->free_key = free_key;
hashtable->free_value = free_value;
for(i = 0; i < num_buckets(hashtable); i++)
{
hashtable->buckets[i].first = hashtable->buckets[i].last =
&hashtable->list;
}
return 0;
}
void hashtable_close(hashtable_t *hashtable)
{
hashtable_do_clear(hashtable);
free(hashtable->buckets);
}
int hashtable_set(hashtable_t *hashtable, void *key, void *value)
{
pair_t *pair;
bucket_t *bucket;
unsigned int hash, index;
/* rehash if the load ratio exceeds 1 */
if(hashtable->size >= num_buckets(hashtable))
if(hashtable_do_rehash(hashtable))
return -1;
hash = hashtable->hash_key(key);
index = hash % num_buckets(hashtable);
bucket = &hashtable->buckets[index];
pair = hashtable_find_pair(hashtable, bucket, key, hash);
if(pair)
{
if(hashtable->free_key)
hashtable->free_key(key);
if(hashtable->free_value)
hashtable->free_value(pair->value);
pair->value = value;
}
else
{
pair = malloc(sizeof(pair_t));
if(!pair)
return -1;
pair->key = key;
pair->value = value;
pair->hash = hash;
list_init(&pair->list);
insert_to_bucket(hashtable, bucket, &pair->list);
hashtable->size++;
}
return 0;
}
void *hashtable_get(hashtable_t *hashtable, const void *key)
{
pair_t *pair;
unsigned int hash;
bucket_t *bucket;
hash = hashtable->hash_key(key);
bucket = &hashtable->buckets[hash % num_buckets(hashtable)];
pair = hashtable_find_pair(hashtable, bucket, key, hash);
if(!pair)
return NULL;
return pair->value;
}
int hashtable_del(hashtable_t *hashtable, const void *key)
{
unsigned int hash = hashtable->hash_key(key);
return hashtable_do_del(hashtable, key, hash);
}
void hashtable_clear(hashtable_t *hashtable)
{
unsigned int i;
hashtable_do_clear(hashtable);
for(i = 0; i < num_buckets(hashtable); i++)
{
hashtable->buckets[i].first = hashtable->buckets[i].last =
&hashtable->list;
}
list_init(&hashtable->list);
hashtable->size = 0;
}
void *hashtable_iter(hashtable_t *hashtable)
{
return hashtable_iter_next(hashtable, &hashtable->list);
}
void *hashtable_iter_at(hashtable_t *hashtable, const void *key)
{
pair_t *pair;
unsigned int hash;
bucket_t *bucket;
hash = hashtable->hash_key(key);
bucket = &hashtable->buckets[hash % num_buckets(hashtable)];
pair = hashtable_find_pair(hashtable, bucket, key, hash);
if(!pair)
return NULL;
return &pair->list;
}
void *hashtable_iter_next(hashtable_t *hashtable, void *iter)
{
list_t *list = (list_t *)iter;
if(list->next == &hashtable->list)
return NULL;
return list->next;
}
void *hashtable_iter_key(void *iter)
{
pair_t *pair = list_to_pair((list_t *)iter);
return pair->key;
}
void *hashtable_iter_value(void *iter)
{
pair_t *pair = list_to_pair((list_t *)iter);
return pair->value;
}
void hashtable_iter_set(hashtable_t *hashtable, void *iter, void *value)
{
pair_t *pair = list_to_pair((list_t *)iter);
if(hashtable->free_value)
hashtable->free_value(pair->value);
pair->value = value;
}
07070100000012000081A4000003E800000064000000015EF4BCA100001777000000000000000000000000000000000000002A00000000cpuminer-2.5.1/compat/jansson/hashtable.h/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#ifndef HASHTABLE_H
#define HASHTABLE_H
typedef unsigned int (*key_hash_fn)(const void *key);
typedef int (*key_cmp_fn)(const void *key1, const void *key2);
typedef void (*free_fn)(void *key);
struct hashtable_list {
struct hashtable_list *prev;
struct hashtable_list *next;
};
struct hashtable_pair {
void *key;
void *value;
unsigned int hash;
struct hashtable_list list;
};
struct hashtable_bucket {
struct hashtable_list *first;
struct hashtable_list *last;
};
typedef struct hashtable {
unsigned int size;
struct hashtable_bucket *buckets;
unsigned int num_buckets; /* index to primes[] */
struct hashtable_list list;
key_hash_fn hash_key;
key_cmp_fn cmp_keys; /* returns non-zero for equal keys */
free_fn free_key;
free_fn free_value;
} hashtable_t;
/**
* hashtable_create - Create a hashtable object
*
* @hash_key: The key hashing function
* @cmp_keys: The key compare function. Returns non-zero for equal and
* zero for unequal unequal keys
* @free_key: If non-NULL, called for a key that is no longer referenced.
* @free_value: If non-NULL, called for a value that is no longer referenced.
*
* Returns a new hashtable object that should be freed with
* hashtable_destroy when it's no longer used, or NULL on failure (out
* of memory).
*/
hashtable_t *hashtable_create(key_hash_fn hash_key, key_cmp_fn cmp_keys,
free_fn free_key, free_fn free_value);
/**
* hashtable_destroy - Destroy a hashtable object
*
* @hashtable: The hashtable
*
* Destroys a hashtable created with hashtable_create().
*/
void hashtable_destroy(hashtable_t *hashtable);
/**
* hashtable_init - Initialize a hashtable object
*
* @hashtable: The (statically allocated) hashtable object
* @hash_key: The key hashing function
* @cmp_keys: The key compare function. Returns non-zero for equal and
* zero for unequal unequal keys
* @free_key: If non-NULL, called for a key that is no longer referenced.
* @free_value: If non-NULL, called for a value that is no longer referenced.
*
* Initializes a statically allocated hashtable object. The object
* should be cleared with hashtable_close when it's no longer used.
*
* Returns 0 on success, -1 on error (out of memory).
*/
int hashtable_init(hashtable_t *hashtable,
key_hash_fn hash_key, key_cmp_fn cmp_keys,
free_fn free_key, free_fn free_value);
/**
* hashtable_close - Release all resources used by a hashtable object
*
* @hashtable: The hashtable
*
* Destroys a statically allocated hashtable object.
*/
void hashtable_close(hashtable_t *hashtable);
/**
* hashtable_set - Add/modify value in hashtable
*
* @hashtable: The hashtable object
* @key: The key
* @value: The value
*
* If a value with the given key already exists, its value is replaced
* with the new value.
*
* Key and value are "stealed" in the sense that hashtable frees them
* automatically when they are no longer used. The freeing is
* accomplished by calling free_key and free_value functions that were
* supplied to hashtable_new. In case one or both of the free
* functions is NULL, the corresponding item is not "stealed".
*
* Returns 0 on success, -1 on failure (out of memory).
*/
int hashtable_set(hashtable_t *hashtable, void *key, void *value);
/**
* hashtable_get - Get a value associated with a key
*
* @hashtable: The hashtable object
* @key: The key
*
* Returns value if it is found, or NULL otherwise.
*/
void *hashtable_get(hashtable_t *hashtable, const void *key);
/**
* hashtable_del - Remove a value from the hashtable
*
* @hashtable: The hashtable object
* @key: The key
*
* Returns 0 on success, or -1 if the key was not found.
*/
int hashtable_del(hashtable_t *hashtable, const void *key);
/**
* hashtable_clear - Clear hashtable
*
* @hashtable: The hashtable object
*
* Removes all items from the hashtable.
*/
void hashtable_clear(hashtable_t *hashtable);
/**
* hashtable_iter - Iterate over hashtable
*
* @hashtable: The hashtable object
*
* Returns an opaque iterator to the first element in the hashtable.
* The iterator should be passed to hashtable_iter_* functions.
* The hashtable items are not iterated over in any particular order.
*
* There's no need to free the iterator in any way. The iterator is
* valid as long as the item that is referenced by the iterator is not
* deleted. Other values may be added or deleted. In particular,
* hashtable_iter_next() may be called on an iterator, and after that
* the key/value pair pointed by the old iterator may be deleted.
*/
void *hashtable_iter(hashtable_t *hashtable);
/**
* hashtable_iter_at - Return an iterator at a specific key
*
* @hashtable: The hashtable object
* @key: The key that the iterator should point to
*
* Like hashtable_iter() but returns an iterator pointing to a
* specific key.
*/
void *hashtable_iter_at(hashtable_t *hashtable, const void *key);
/**
* hashtable_iter_next - Advance an iterator
*
* @hashtable: The hashtable object
* @iter: The iterator
*
* Returns a new iterator pointing to the next element in the
* hashtable or NULL if the whole hastable has been iterated over.
*/
void *hashtable_iter_next(hashtable_t *hashtable, void *iter);
/**
* hashtable_iter_key - Retrieve the key pointed by an iterator
*
* @iter: The iterator
*/
void *hashtable_iter_key(void *iter);
/**
* hashtable_iter_value - Retrieve the value pointed by an iterator
*
* @iter: The iterator
*/
void *hashtable_iter_value(void *iter);
/**
* hashtable_iter_set - Set the value pointed by an iterator
*
* @iter: The iterator
* @value: The value to set
*/
void hashtable_iter_set(hashtable_t *hashtable, void *iter, void *value);
#endif
07070100000013000081A4000003E800000064000000015EF4BCA1000015FB000000000000000000000000000000000000002800000000cpuminer-2.5.1/compat/jansson/jansson.h/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* Jansson is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#ifndef JANSSON_H
#define JANSSON_H
#include <stdio.h>
#ifndef __cplusplus
#define JSON_INLINE inline
#else
#define JSON_INLINE inline
extern "C" {
#endif
/* types */
typedef enum {
JSON_OBJECT,
JSON_ARRAY,
JSON_STRING,
JSON_INTEGER,
JSON_REAL,
JSON_TRUE,
JSON_FALSE,
JSON_NULL
} json_type;
typedef struct {
json_type type;
unsigned long refcount;
} json_t;
#define json_typeof(json) ((json)->type)
#define json_is_object(json) (json && json_typeof(json) == JSON_OBJECT)
#define json_is_array(json) (json && json_typeof(json) == JSON_ARRAY)
#define json_is_string(json) (json && json_typeof(json) == JSON_STRING)
#define json_is_integer(json) (json && json_typeof(json) == JSON_INTEGER)
#define json_is_real(json) (json && json_typeof(json) == JSON_REAL)
#define json_is_number(json) (json_is_integer(json) || json_is_real(json))
#define json_is_true(json) (json && json_typeof(json) == JSON_TRUE)
#define json_is_false(json) (json && json_typeof(json) == JSON_FALSE)
#define json_is_boolean(json) (json_is_true(json) || json_is_false(json))
#define json_is_null(json) (json && json_typeof(json) == JSON_NULL)
/* construction, destruction, reference counting */
json_t *json_object(void);
json_t *json_array(void);
json_t *json_string(const char *value);
json_t *json_string_nocheck(const char *value);
json_t *json_integer(int value);
json_t *json_real(double value);
json_t *json_true(void);
json_t *json_false(void);
json_t *json_null(void);
static JSON_INLINE
json_t *json_incref(json_t *json)
{
if(json && json->refcount != (unsigned int)-1)
++json->refcount;
return json;
}
/* do not call json_delete directly */
void json_delete(json_t *json);
static JSON_INLINE
void json_decref(json_t *json)
{
if(json && json->refcount != (unsigned int)-1 && --json->refcount == 0)
json_delete(json);
}
/* getters, setters, manipulation */
unsigned int json_object_size(const json_t *object);
json_t *json_object_get(const json_t *object, const char *key);
int json_object_set_new(json_t *object, const char *key, json_t *value);
int json_object_set_new_nocheck(json_t *object, const char *key, json_t *value);
int json_object_del(json_t *object, const char *key);
int json_object_clear(json_t *object);
int json_object_update(json_t *object, json_t *other);
void *json_object_iter(json_t *object);
void *json_object_iter_at(json_t *object, const char *key);
void *json_object_iter_next(json_t *object, void *iter);
const char *json_object_iter_key(void *iter);
json_t *json_object_iter_value(void *iter);
int json_object_iter_set_new(json_t *object, void *iter, json_t *value);
static JSON_INLINE
int json_object_set(json_t *object, const char *key, json_t *value)
{
return json_object_set_new(object, key, json_incref(value));
}
static JSON_INLINE
int json_object_set_nocheck(json_t *object, const char *key, json_t *value)
{
return json_object_set_new_nocheck(object, key, json_incref(value));
}
static inline
int json_object_iter_set(json_t *object, void *iter, json_t *value)
{
return json_object_iter_set_new(object, iter, json_incref(value));
}
unsigned int json_array_size(const json_t *array);
json_t *json_array_get(const json_t *array, unsigned int index);
int json_array_set_new(json_t *array, unsigned int index, json_t *value);
int json_array_append_new(json_t *array, json_t *value);
int json_array_insert_new(json_t *array, unsigned int index, json_t *value);
int json_array_remove(json_t *array, unsigned int index);
int json_array_clear(json_t *array);
int json_array_extend(json_t *array, json_t *other);
static JSON_INLINE
int json_array_set(json_t *array, unsigned int index, json_t *value)
{
return json_array_set_new(array, index, json_incref(value));
}
static JSON_INLINE
int json_array_append(json_t *array, json_t *value)
{
return json_array_append_new(array, json_incref(value));
}
static JSON_INLINE
int json_array_insert(json_t *array, unsigned int index, json_t *value)
{
return json_array_insert_new(array, index, json_incref(value));
}
const char *json_string_value(const json_t *string);
int json_integer_value(const json_t *integer);
double json_real_value(const json_t *real);
double json_number_value(const json_t *json);
int json_string_set(json_t *string, const char *value);
int json_string_set_nocheck(json_t *string, const char *value);
int json_integer_set(json_t *integer, int value);
int json_real_set(json_t *real, double value);
/* equality */
int json_equal(json_t *value1, json_t *value2);
/* copying */
json_t *json_copy(json_t *value);
json_t *json_deep_copy(json_t *value);
/* loading, printing */
#define JSON_ERROR_TEXT_LENGTH 160
typedef struct {
char text[JSON_ERROR_TEXT_LENGTH];
int line;
} json_error_t;
json_t *json_loads(const char *input, json_error_t *error);
json_t *json_loadf(FILE *input, json_error_t *error);
json_t *json_load_file(const char *path, json_error_t *error);
#define JSON_INDENT(n) (n & 0xFF)
#define JSON_COMPACT 0x100
#define JSON_ENSURE_ASCII 0x200
#define JSON_SORT_KEYS 0x400
#define JSON_PRESERVE_ORDER 0x800
char *json_dumps(const json_t *json, unsigned long flags);
int json_dumpf(const json_t *json, FILE *output, unsigned long flags);
int json_dump_file(const json_t *json, const char *path, unsigned long flags);
#ifdef __cplusplus
}
#endif
#endif
07070100000014000081A4000003E800000064000000015EF4BCA100000563000000000000000000000000000000000000003000000000cpuminer-2.5.1/compat/jansson/jansson_private.h/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* Jansson is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#ifndef JANSSON_PRIVATE_H
#define JANSSON_PRIVATE_H
#include "jansson.h"
#include "hashtable.h"
#define container_of(ptr_, type_, member_) \
((type_ *)((char *)ptr_ - (size_t)&((type_ *)0)->member_))
typedef struct {
json_t json;
hashtable_t hashtable;
unsigned long serial;
int visited;
} json_object_t;
typedef struct {
json_t json;
unsigned int size;
unsigned int entries;
json_t **table;
int visited;
} json_array_t;
typedef struct {
json_t json;
char *value;
} json_string_t;
typedef struct {
json_t json;
double value;
} json_real_t;
typedef struct {
json_t json;
int value;
} json_integer_t;
#define json_to_object(json_) container_of(json_, json_object_t, json)
#define json_to_array(json_) container_of(json_, json_array_t, json)
#define json_to_string(json_) container_of(json_, json_string_t, json)
#define json_to_real(json_) container_of(json_, json_real_t, json)
#define json_to_integer(json_) container_of(json_, json_integer_t, json)
typedef struct {
unsigned long serial;
char key[];
} object_key_t;
const object_key_t *jsonp_object_iter_fullkey(void *iter);
#endif
07070100000015000081A4000003E800000064000000015EF4BCA100004FB9000000000000000000000000000000000000002500000000cpuminer-2.5.1/compat/jansson/load.c/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* Jansson is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <assert.h>
#include <jansson.h>
#include "jansson_private.h"
#include "strbuffer.h"
#include "utf.h"
#define TOKEN_INVALID -1
#define TOKEN_EOF 0
#define TOKEN_STRING 256
#define TOKEN_INTEGER 257
#define TOKEN_REAL 258
#define TOKEN_TRUE 259
#define TOKEN_FALSE 260
#define TOKEN_NULL 261
/* read one byte from stream, return EOF on end of file */
typedef int (*get_func)(void *data);
/* return non-zero if end of file has been reached */
typedef int (*eof_func)(void *data);
typedef struct {
get_func get;
eof_func eof;
void *data;
int stream_pos;
char buffer[5];
int buffer_pos;
} stream_t;
typedef struct {
stream_t stream;
strbuffer_t saved_text;
int token;
int line, column;
union {
char *string;
int integer;
double real;
} value;
} lex_t;
/*** error reporting ***/
static void error_init(json_error_t *error)
{
if(error)
{
error->text[0] = '\0';
error->line = -1;
}
}
static void error_set(json_error_t *error, const lex_t *lex,
const char *msg, ...)
{
va_list ap;
char text[JSON_ERROR_TEXT_LENGTH];
if(!error || error->text[0] != '\0') {
/* error already set */
return;
}
va_start(ap, msg);
vsnprintf(text, JSON_ERROR_TEXT_LENGTH, msg, ap);
va_end(ap);
if(lex)
{
const char *saved_text = strbuffer_value(&lex->saved_text);
error->line = lex->line;
if(saved_text && saved_text[0])
{
if(lex->saved_text.length <= 20) {
snprintf(error->text, JSON_ERROR_TEXT_LENGTH,
"%s near '%s'", text, saved_text);
}
else
snprintf(error->text, JSON_ERROR_TEXT_LENGTH, "%s", text);
}
else
{
snprintf(error->text, JSON_ERROR_TEXT_LENGTH,
"%s near end of file", text);
}
}
else
{
error->line = -1;
snprintf(error->text, JSON_ERROR_TEXT_LENGTH, "%s", text);
}
}
/*** lexical analyzer ***/
static void
stream_init(stream_t *stream, get_func get, eof_func eof, void *data)
{
stream->get = get;
stream->eof = eof;
stream->data = data;
stream->stream_pos = 0;
stream->buffer[0] = '\0';
stream->buffer_pos = 0;
}
static char stream_get(stream_t *stream, json_error_t *error)
{
char c;
if(!stream->buffer[stream->buffer_pos])
{
stream->buffer[0] = stream->get(stream->data);
stream->buffer_pos = 0;
c = stream->buffer[0];
if((unsigned char)c >= 0x80 && c != (char)EOF)
{
/* multi-byte UTF-8 sequence */
int i, count;
count = utf8_check_first(c);
if(!count)
goto out;
assert(count >= 2);
for(i = 1; i < count; i++)
stream->buffer[i] = stream->get(stream->data);
if(!utf8_check_full(stream->buffer, count, NULL))
goto out;
stream->stream_pos += count;
stream->buffer[count] = '\0';
}
else {
stream->buffer[1] = '\0';
stream->stream_pos++;
}
}
return stream->buffer[stream->buffer_pos++];
out:
error_set(error, NULL, "unable to decode byte 0x%x at position %d",
(unsigned char)c, stream->stream_pos);
stream->buffer[0] = EOF;
stream->buffer[1] = '\0';
stream->buffer_pos = 1;
return EOF;
}
static void stream_unget(stream_t *stream, char c)
{
assert(stream->buffer_pos > 0);
stream->buffer_pos--;
assert(stream->buffer[stream->buffer_pos] == c);
}
static int lex_get(lex_t *lex, json_error_t *error)
{
return stream_get(&lex->stream, error);
}
static int lex_eof(lex_t *lex)
{
return lex->stream.eof(lex->stream.data);
}
static void lex_save(lex_t *lex, char c)
{
strbuffer_append_byte(&lex->saved_text, c);
}
static int lex_get_save(lex_t *lex, json_error_t *error)
{
char c = stream_get(&lex->stream, error);
lex_save(lex, c);
return c;
}
static void lex_unget_unsave(lex_t *lex, char c)
{
char d;
stream_unget(&lex->stream, c);
d = strbuffer_pop(&lex->saved_text);
assert(c == d);
}
static void lex_save_cached(lex_t *lex)
{
while(lex->stream.buffer[lex->stream.buffer_pos] != '\0')
{
lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]);
lex->stream.buffer_pos++;
}
}
/* assumes that str points to 'u' plus at least 4 valid hex digits */
static int32_t decode_unicode_escape(const char *str)
{
int i;
int32_t value = 0;
assert(str[0] == 'u');
for(i = 1; i <= 4; i++) {
char c = str[i];
value <<= 4;
if(isdigit(c))
value += c - '0';
else if(islower(c))
value += c - 'a' + 10;
else if(isupper(c))
value += c - 'A' + 10;
else
assert(0);
}
return value;
}
static void lex_scan_string(lex_t *lex, json_error_t *error)
{
char c;
const char *p;
char *t;
int i;
lex->value.string = NULL;
lex->token = TOKEN_INVALID;
c = lex_get_save(lex, error);
while(c != '"') {
if(c == (char)EOF) {
lex_unget_unsave(lex, c);
if(lex_eof(lex))
error_set(error, lex, "premature end of input");
goto out;
}
else if((unsigned char)c <= 0x1F) {
/* control character */
lex_unget_unsave(lex, c);
if(c == '\n')
error_set(error, lex, "unexpected newline", c);
else
error_set(error, lex, "control character 0x%x", c);
goto out;
}
else if(c == '\\') {
c = lex_get_save(lex, error);
if(c == 'u') {
c = lex_get_save(lex, error);
for(i = 0; i < 4; i++) {
if(!isxdigit(c)) {
lex_unget_unsave(lex, c);
error_set(error, lex, "invalid escape");
goto out;
}
c = lex_get_save(lex, error);
}
}
else if(c == '"' || c == '\\' || c == '/' || c == 'b' ||
c == 'f' || c == 'n' || c == 'r' || c == 't')
c = lex_get_save(lex, error);
else {
lex_unget_unsave(lex, c);
error_set(error, lex, "invalid escape");
goto out;
}
}
else
c = lex_get_save(lex, error);
}
/* the actual value is at most of the same length as the source
string, because:
- shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte
- a single \uXXXX escape (length 6) is converted to at most 3 bytes
- two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair
are converted to 4 bytes
*/
lex->value.string = malloc(lex->saved_text.length + 1);
if(!lex->value.string) {
/* this is not very nice, since TOKEN_INVALID is returned */
goto out;
}
/* the target */
t = lex->value.string;
/* + 1 to skip the " */
p = strbuffer_value(&lex->saved_text) + 1;
while(*p != '"') {
if(*p == '\\') {
p++;
if(*p == 'u') {
char buffer[4];
int length;
int32_t value;
value = decode_unicode_escape(p);
p += 5;
if(0xD800 <= value && value <= 0xDBFF) {
/* surrogate pair */
if(*p == '\\' && *(p + 1) == 'u') {
int32_t value2 = decode_unicode_escape(++p);
p += 5;
if(0xDC00 <= value2 && value2 <= 0xDFFF) {
/* valid second surrogate */
value =
((value - 0xD800) << 10) +
(value2 - 0xDC00) +
0x10000;
}
else {
/* invalid second surrogate */
error_set(error, lex,
"invalid Unicode '\\u%04X\\u%04X'",
value, value2);
goto out;
}
}
else {
/* no second surrogate */
error_set(error, lex, "invalid Unicode '\\u%04X'",
value);
goto out;
}
}
else if(0xDC00 <= value && value <= 0xDFFF) {
error_set(error, lex, "invalid Unicode '\\u%04X'", value);
goto out;
}
else if(value == 0)
{
error_set(error, lex, "\\u0000 is not allowed");
goto out;
}
if(utf8_encode(value, buffer, &length))
assert(0);
memcpy(t, buffer, length);
t += length;
}
else {
switch(*p) {
case '"': case '\\': case '/':
*t = *p; break;
case 'b': *t = '\b'; break;
case 'f': *t = '\f'; break;
case 'n': *t = '\n'; break;
case 'r': *t = '\r'; break;
case 't': *t = '\t'; break;
default: assert(0);
}
t++;
p++;
}
}
else
*(t++) = *(p++);
}
*t = '\0';
lex->token = TOKEN_STRING;
return;
out:
free(lex->value.string);
}
static int lex_scan_number(lex_t *lex, char c, json_error_t *error)
{
const char *saved_text;
char *end;
double value;
lex->token = TOKEN_INVALID;
if(c == '-')
c = lex_get_save(lex, error);
if(c == '0') {
c = lex_get_save(lex, error);
if(isdigit(c)) {
lex_unget_unsave(lex, c);
goto out;
}
}
else if(isdigit(c)) {
c = lex_get_save(lex, error);
while(isdigit(c))
c = lex_get_save(lex, error);
}
else {
lex_unget_unsave(lex, c);
goto out;
}
if(c != '.' && c != 'E' && c != 'e') {
long value;
lex_unget_unsave(lex, c);
saved_text = strbuffer_value(&lex->saved_text);
value = strtol(saved_text, &end, 10);
assert(end == saved_text + lex->saved_text.length);
if((value == LONG_MAX && errno == ERANGE) || value > INT_MAX) {
error_set(error, lex, "too big integer");
goto out;
}
else if((value == LONG_MIN && errno == ERANGE) || value < INT_MIN) {
error_set(error, lex, "too big negative integer");
goto out;
}
lex->token = TOKEN_INTEGER;
lex->value.integer = (int)value;
return 0;
}
if(c == '.') {
c = lex_get(lex, error);
if(!isdigit(c))
goto out;
lex_save(lex, c);
c = lex_get_save(lex, error);
while(isdigit(c))
c = lex_get_save(lex, error);
}
if(c == 'E' || c == 'e') {
c = lex_get_save(lex, error);
if(c == '+' || c == '-')
c = lex_get_save(lex, error);
if(!isdigit(c)) {
lex_unget_unsave(lex, c);
goto out;
}
c = lex_get_save(lex, error);
while(isdigit(c))
c = lex_get_save(lex, error);
}
lex_unget_unsave(lex, c);
saved_text = strbuffer_value(&lex->saved_text);
value = strtod(saved_text, &end);
assert(end == saved_text + lex->saved_text.length);
if(errno == ERANGE && value != 0) {
error_set(error, lex, "real number overflow");
goto out;
}
lex->token = TOKEN_REAL;
lex->value.real = value;
return 0;
out:
return -1;
}
static int lex_scan(lex_t *lex, json_error_t *error)
{
char c;
strbuffer_clear(&lex->saved_text);
if(lex->token == TOKEN_STRING) {
free(lex->value.string);
lex->value.string = NULL;
}
c = lex_get(lex, error);
while(c == ' ' || c == '\t' || c == '\n' || c == '\r')
{
if(c == '\n')
lex->line++;
c = lex_get(lex, error);
}
if(c == (char)EOF) {
if(lex_eof(lex))
lex->token = TOKEN_EOF;
else
lex->token = TOKEN_INVALID;
goto out;
}
lex_save(lex, c);
if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',')
lex->token = c;
else if(c == '"')
lex_scan_string(lex, error);
else if(isdigit(c) || c == '-') {
if(lex_scan_number(lex, c, error))
goto out;
}
else if(isupper(c) || islower(c)) {
/* eat up the whole identifier for clearer error messages */
const char *saved_text;
c = lex_get_save(lex, error);
while(isupper(c) || islower(c))
c = lex_get_save(lex, error);
lex_unget_unsave(lex, c);
saved_text = strbuffer_value(&lex->saved_text);
if(strcmp(saved_text, "true") == 0)
lex->token = TOKEN_TRUE;
else if(strcmp(saved_text, "false") == 0)
lex->token = TOKEN_FALSE;
else if(strcmp(saved_text, "null") == 0)
lex->token = TOKEN_NULL;
else
lex->token = TOKEN_INVALID;
}
else {
/* save the rest of the input UTF-8 sequence to get an error
message of valid UTF-8 */
lex_save_cached(lex);
lex->token = TOKEN_INVALID;
}
out:
return lex->token;
}
static char *lex_steal_string(lex_t *lex)
{
char *result = NULL;
if(lex->token == TOKEN_STRING)
{
result = lex->value.string;
lex->value.string = NULL;
}
return result;
}
static int lex_init(lex_t *lex, get_func get, eof_func eof, void *data)
{
stream_init(&lex->stream, get, eof, data);
if(strbuffer_init(&lex->saved_text))
return -1;
lex->token = TOKEN_INVALID;
lex->line = 1;
return 0;
}
static void lex_close(lex_t *lex)
{
if(lex->token == TOKEN_STRING)
free(lex->value.string);
strbuffer_close(&lex->saved_text);
}
/*** parser ***/
static json_t *parse_value(lex_t *lex, json_error_t *error);
static json_t *parse_object(lex_t *lex, json_error_t *error)
{
json_t *object = json_object();
if(!object)
return NULL;
lex_scan(lex, error);
if(lex->token == '}')
return object;
while(1) {
char *key;
json_t *value;
if(lex->token != TOKEN_STRING) {
error_set(error, lex, "string or '}' expected");
goto error;
}
key = lex_steal_string(lex);
if(!key)
return NULL;
lex_scan(lex, error);
if(lex->token != ':') {
free(key);
error_set(error, lex, "':' expected");
goto error;
}
lex_scan(lex, error);
value = parse_value(lex, error);
if(!value) {
free(key);
goto error;
}
if(json_object_set_nocheck(object, key, value)) {
free(key);
json_decref(value);
goto error;
}
json_decref(value);
free(key);
lex_scan(lex, error);
if(lex->token != ',')
break;
lex_scan(lex, error);
}
if(lex->token != '}') {
error_set(error, lex, "'}' expected");
goto error;
}
return object;
error:
json_decref(object);
return NULL;
}
static json_t *parse_array(lex_t *lex, json_error_t *error)
{
json_t *array = json_array();
if(!array)
return NULL;
lex_scan(lex, error);
if(lex->token == ']')
return array;
while(lex->token) {
json_t *elem = parse_value(lex, error);
if(!elem)
goto error;
if(json_array_append(array, elem)) {
json_decref(elem);
goto error;
}
json_decref(elem);
lex_scan(lex, error);
if(lex->token != ',')
break;
lex_scan(lex, error);
}
if(lex->token != ']') {
error_set(error, lex, "']' expected");
goto error;
}
return array;
error:
json_decref(array);
return NULL;
}
static json_t *parse_value(lex_t *lex, json_error_t *error)
{
json_t *json;
switch(lex->token) {
case TOKEN_STRING: {
json = json_string_nocheck(lex->value.string);
break;
}
case TOKEN_INTEGER: {
json = json_integer(lex->value.integer);
break;
}
case TOKEN_REAL: {
json = json_real(lex->value.real);
break;
}
case TOKEN_TRUE:
json = json_true();
break;
case TOKEN_FALSE:
json = json_false();
break;
case TOKEN_NULL:
json = json_null();
break;
case '{':
json = parse_object(lex, error);
break;
case '[':
json = parse_array(lex, error);
break;
case TOKEN_INVALID:
error_set(error, lex, "invalid token");
return NULL;
default:
error_set(error, lex, "unexpected token");
return NULL;
}
if(!json)
return NULL;
return json;
}
static json_t *parse_json(lex_t *lex, json_error_t *error)
{
error_init(error);
lex_scan(lex, error);
if(lex->token != '[' && lex->token != '{') {
error_set(error, lex, "'[' or '{' expected");
return NULL;
}
return parse_value(lex, error);
}
typedef struct
{
const char *data;
int pos;
} string_data_t;
static int string_get(void *data)
{
char c;
string_data_t *stream = (string_data_t *)data;
c = stream->data[stream->pos];
if(c == '\0')
return EOF;
else
{
stream->pos++;
return c;
}
}
static int string_eof(void *data)
{
string_data_t *stream = (string_data_t *)data;
return (stream->data[stream->pos] == '\0');
}
json_t *json_loads(const char *string, json_error_t *error)
{
lex_t lex;
json_t *result;
string_data_t stream_data = {
.data = string,
.pos = 0
};
if(lex_init(&lex, string_get, string_eof, (void *)&stream_data))
return NULL;
result = parse_json(&lex, error);
if(!result)
goto out;
lex_scan(&lex, error);
if(lex.token != TOKEN_EOF) {
error_set(error, &lex, "end of file expected");
json_decref(result);
result = NULL;
}
out:
lex_close(&lex);
return result;
}
json_t *json_loadf(FILE *input, json_error_t *error)
{
lex_t lex;
json_t *result;
if(lex_init(&lex, (get_func)fgetc, (eof_func)feof, input))
return NULL;
result = parse_json(&lex, error);
if(!result)
goto out;
lex_scan(&lex, error);
if(lex.token != TOKEN_EOF) {
error_set(error, &lex, "end of file expected");
json_decref(result);
result = NULL;
}
out:
lex_close(&lex);
return result;
}
json_t *json_load_file(const char *path, json_error_t *error)
{
json_t *result;
FILE *fp;
error_init(error);
fp = fopen(path, "r");
if(!fp)
{
error_set(error, NULL, "unable to open %s: %s",
path, strerror(errno));
return NULL;
}
result = json_loadf(fp, error);
fclose(fp);
return result;
}
07070100000016000081A4000003E800000064000000015EF4BCA100000852000000000000000000000000000000000000002A00000000cpuminer-2.5.1/compat/jansson/strbuffer.c/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* Jansson is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#define _GNU_SOURCE
#include <stdlib.h>
#include <string.h>
#include "strbuffer.h"
#include "util.h"
#define STRBUFFER_MIN_SIZE 16
#define STRBUFFER_FACTOR 2
int strbuffer_init(strbuffer_t *strbuff)
{
strbuff->size = STRBUFFER_MIN_SIZE;
strbuff->length = 0;
strbuff->value = malloc(strbuff->size);
if(!strbuff->value)
return -1;
/* initialize to empty */
strbuff->value[0] = '\0';
return 0;
}
void strbuffer_close(strbuffer_t *strbuff)
{
free(strbuff->value);
strbuff->size = 0;
strbuff->length = 0;
strbuff->value = NULL;
}
void strbuffer_clear(strbuffer_t *strbuff)
{
strbuff->length = 0;
strbuff->value[0] = '\0';
}
const char *strbuffer_value(const strbuffer_t *strbuff)
{
return strbuff->value;
}
char *strbuffer_steal_value(strbuffer_t *strbuff)
{
char *result = strbuff->value;
strbuffer_init(strbuff);
return result;
}
int strbuffer_append(strbuffer_t *strbuff, const char *string)
{
return strbuffer_append_bytes(strbuff, string, strlen(string));
}
int strbuffer_append_byte(strbuffer_t *strbuff, char byte)
{
return strbuffer_append_bytes(strbuff, &byte, 1);
}
int strbuffer_append_bytes(strbuffer_t *strbuff, const char *data, int size)
{
if(strbuff->length + size >= strbuff->size)
{
strbuff->size = max(strbuff->size * STRBUFFER_FACTOR,
strbuff->length + size + 1);
strbuff->value = realloc(strbuff->value, strbuff->size);
if(!strbuff->value)
return -1;
}
memcpy(strbuff->value + strbuff->length, data, size);
strbuff->length += size;
strbuff->value[strbuff->length] = '\0';
return 0;
}
char strbuffer_pop(strbuffer_t *strbuff)
{
if(strbuff->length > 0) {
char c = strbuff->value[--strbuff->length];
strbuff->value[strbuff->length] = '\0';
return c;
}
else
return '\0';
}
07070100000017000081A4000003E800000064000000015EF4BCA100000362000000000000000000000000000000000000002A00000000cpuminer-2.5.1/compat/jansson/strbuffer.h/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* Jansson is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#ifndef STRBUFFER_H
#define STRBUFFER_H
typedef struct {
char *value;
int length; /* bytes used */
int size; /* bytes allocated */
} strbuffer_t;
int strbuffer_init(strbuffer_t *strbuff);
void strbuffer_close(strbuffer_t *strbuff);
void strbuffer_clear(strbuffer_t *strbuff);
const char *strbuffer_value(const strbuffer_t *strbuff);
char *strbuffer_steal_value(strbuffer_t *strbuff);
int strbuffer_append(strbuffer_t *strbuff, const char *string);
int strbuffer_append_byte(strbuffer_t *strbuff, char byte);
int strbuffer_append_bytes(strbuffer_t *strbuff, const char *data, int size);
char strbuffer_pop(strbuffer_t *strbuff);
#endif
07070100000018000081A4000003E800000064000000015EF4BCA100000FB6000000000000000000000000000000000000002400000000cpuminer-2.5.1/compat/jansson/utf.c/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* Jansson is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#include <string.h>
#include "utf.h"
int utf8_encode(int32_t codepoint, char *buffer, int *size)
{
if(codepoint < 0)
return -1;
else if(codepoint < 0x80)
{
buffer[0] = (char)codepoint;
*size = 1;
}
else if(codepoint < 0x800)
{
buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6);
buffer[1] = 0x80 + ((codepoint & 0x03F));
*size = 2;
}
else if(codepoint < 0x10000)
{
buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12);
buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6);
buffer[2] = 0x80 + ((codepoint & 0x003F));
*size = 3;
}
else if(codepoint <= 0x10FFFF)
{
buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18);
buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12);
buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6);
buffer[3] = 0x80 + ((codepoint & 0x00003F));
*size = 4;
}
else
return -1;
return 0;
}
int utf8_check_first(char byte)
{
unsigned char u = (unsigned char)byte;
if(u < 0x80)
return 1;
if(0x80 <= u && u <= 0xBF) {
/* second, third or fourth byte of a multi-byte
sequence, i.e. a "continuation byte" */
return 0;
}
else if(u == 0xC0 || u == 0xC1) {
/* overlong encoding of an ASCII byte */
return 0;
}
else if(0xC2 <= u && u <= 0xDF) {
/* 2-byte sequence */
return 2;
}
else if(0xE0 <= u && u <= 0xEF) {
/* 3-byte sequence */
return 3;
}
else if(0xF0 <= u && u <= 0xF4) {
/* 4-byte sequence */
return 4;
}
else { /* u >= 0xF5 */
/* Restricted (start of 4-, 5- or 6-byte sequence) or invalid
UTF-8 */
return 0;
}
}
int utf8_check_full(const char *buffer, int size, int32_t *codepoint)
{
int i;
int32_t value = 0;
unsigned char u = (unsigned char)buffer[0];
if(size == 2)
{
value = u & 0x1F;
}
else if(size == 3)
{
value = u & 0xF;
}
else if(size == 4)
{
value = u & 0x7;
}
else
return 0;
for(i = 1; i < size; i++)
{
u = (unsigned char)buffer[i];
if(u < 0x80 || u > 0xBF) {
/* not a continuation byte */
return 0;
}
value = (value << 6) + (u & 0x3F);
}
if(value > 0x10FFFF) {
/* not in Unicode range */
return 0;
}
else if(0xD800 <= value && value <= 0xDFFF) {
/* invalid code point (UTF-16 surrogate halves) */
return 0;
}
else if((size == 2 && value < 0x80) ||
(size == 3 && value < 0x800) ||
(size == 4 && value < 0x10000)) {
/* overlong encoding */
return 0;
}
if(codepoint)
*codepoint = value;
return 1;
}
const char *utf8_iterate(const char *buffer, int32_t *codepoint)
{
int count;
int32_t value;
if(!*buffer)
return buffer;
count = utf8_check_first(buffer[0]);
if(count <= 0)
return NULL;
if(count == 1)
value = (unsigned char)buffer[0];
else
{
if(!utf8_check_full(buffer, count, &value))
return NULL;
}
if(codepoint)
*codepoint = value;
return buffer + count;
}
int utf8_check_string(const char *string, int length)
{
int i;
if(length == -1)
length = strlen(string);
for(i = 0; i < length; i++)
{
int count = utf8_check_first(string[i]);
if(count == 0)
return 0;
else if(count > 1)
{
if(i + count > length)
return 0;
if(!utf8_check_full(&string[i], count, NULL))
return 0;
i += count - 1;
}
}
return 1;
}
07070100000019000081A4000003E800000064000000015EF4BCA10000030A000000000000000000000000000000000000002400000000cpuminer-2.5.1/compat/jansson/utf.h/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* Jansson is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#ifndef UTF_H
#define UTF_H
#include <config.h>
#ifdef HAVE_INTTYPES_H
/* inttypes.h includes stdint.h in a standard environment, so there's
no need to include stdint.h separately. If inttypes.h doesn't define
int32_t, it's defined in config.h. */
#include <inttypes.h>
#endif
int utf8_encode(int codepoint, char *buffer, int *size);
int utf8_check_first(char byte);
int utf8_check_full(const char *buffer, int size, int32_t *codepoint);
const char *utf8_iterate(const char *buffer, int32_t *codepoint);
int utf8_check_string(const char *string, int length);
#endif
0707010000001A000081A4000003E800000064000000015EF4BCA100000120000000000000000000000000000000000000002500000000cpuminer-2.5.1/compat/jansson/util.h/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* Jansson is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#ifndef UTIL_H
#define UTIL_H
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
0707010000001B000081A4000003E800000064000000015EF4BCA100004C51000000000000000000000000000000000000002600000000cpuminer-2.5.1/compat/jansson/value.c/*
* Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
*
* Jansson is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#define _GNU_SOURCE
#include <config.h>
#include <stdlib.h>
#include <string.h>
#include <jansson.h>
#include "hashtable.h"
#include "jansson_private.h"
#include "utf.h"
#include "util.h"
static inline void json_init(json_t *json, json_type type)
{
json->type = type;
json->refcount = 1;
}
/*** object ***/
/* This macro just returns a pointer that's a few bytes backwards from
string. This makes it possible to pass a pointer to object_key_t
when only the string inside it is used, without actually creating
an object_key_t instance. */
#define string_to_key(string) container_of(string, object_key_t, key)
static unsigned int hash_key(const void *ptr)
{
const char *str = ((const object_key_t *)ptr)->key;
unsigned int hash = 5381;
unsigned int c;
while((c = (unsigned int)*str))
{
hash = ((hash << 5) + hash) + c;
str++;
}
return hash;
}
static int key_equal(const void *ptr1, const void *ptr2)
{
return strcmp(((const object_key_t *)ptr1)->key,
((const object_key_t *)ptr2)->key) == 0;
}
static void value_decref(void *value)
{
json_decref((json_t *)value);
}
json_t *json_object(void)
{
json_object_t *object = malloc(sizeof(json_object_t));
if(!object)
return NULL;
json_init(&object->json, JSON_OBJECT);
if(hashtable_init(&object->hashtable, hash_key, key_equal,
free, value_decref))
{
free(object);
return NULL;
}
object->serial = 0;
object->visited = 0;
return &object->json;
}
static void json_delete_object(json_object_t *object)
{
hashtable_close(&object->hashtable);
free(object);
}
unsigned int json_object_size(const json_t *json)
{
json_object_t *object;
if(!json_is_object(json))
return -1;
object = json_to_object(json);
return object->hashtable.size;
}
json_t *json_object_get(const json_t *json, const char *key)
{
json_object_t *object;
if(!json_is_object(json))
return NULL;
object = json_to_object(json);
return hashtable_get(&object->hashtable, string_to_key(key));
}
int json_object_set_new_nocheck(json_t *json, const char *key, json_t *value)
{
json_object_t *object;
object_key_t *k;
if(!key || !value)
return -1;
if(!json_is_object(json) || json == value)
{
json_decref(value);
return -1;
}
object = json_to_object(json);
k = malloc(sizeof(object_key_t) + strlen(key) + 1);
if(!k)
return -1;
k->serial = object->serial++;
strcpy(k->key, key);
if(hashtable_set(&object->hashtable, k, value))
{
json_decref(value);
return -1;
}
return 0;
}
int json_object_set_new(json_t *json, const char *key, json_t *value)
{
if(!key || !utf8_check_string(key, -1))
{
json_decref(value);
return -1;
}
return json_object_set_new_nocheck(json, key, value);
}
int json_object_del(json_t *json, const char *key)
{
json_object_t *object;
if(!json_is_object(json))
return -1;
object = json_to_object(json);
return hashtable_del(&object->hashtable, string_to_key(key));
}
int json_object_clear(json_t *json)
{
json_object_t *object;
if(!json_is_object(json))
return -1;
object = json_to_object(json);
hashtable_clear(&object->hashtable);
return 0;
}
int json_object_update(json_t *object, json_t *other)
{
void *iter;
if(!json_is_object(object) || !json_is_object(other))
return -1;
iter = json_object_iter(other);
while(iter) {
const char *key;
json_t *value;
key = json_object_iter_key(iter);
value = json_object_iter_value(iter);
if(json_object_set_nocheck(object, key, value))
return -1;
iter = json_object_iter_next(other, iter);
}
return 0;
}
void *json_object_iter(json_t *json)
{
json_object_t *object;
if(!json_is_object(json))
return NULL;
object = json_to_object(json);
return hashtable_iter(&object->hashtable);
}
void *json_object_iter_at(json_t *json, const char *key)
{
json_object_t *object;
if(!key || !json_is_object(json))
return NULL;
object = json_to_object(json);
return hashtable_iter_at(&object->hashtable, string_to_key(key));
}
void *json_object_iter_next(json_t *json, void *iter)
{
json_object_t *object;
if(!json_is_object(json) || iter == NULL)
return NULL;
object = json_to_object(json);
return hashtable_iter_next(&object->hashtable, iter);
}
const object_key_t *jsonp_object_iter_fullkey(void *iter)
{
if(!iter)
return NULL;
return hashtable_iter_key(iter);
}
const char *json_object_iter_key(void *iter)
{
if(!iter)
return NULL;
return jsonp_object_iter_fullkey(iter)->key;
}
json_t *json_object_iter_value(void *iter)
{
if(!iter)
return NULL;
return (json_t *)hashtable_iter_value(iter);
}
int json_object_iter_set_new(json_t *json, void *iter, json_t *value)
{
json_object_t *object;
if(!json_is_object(json) || !iter || !value)
return -1;
object = json_to_object(json);
hashtable_iter_set(&object->hashtable, iter, value);
return 0;
}
static int json_object_equal(json_t *object1, json_t *object2)
{
void *iter;
if(json_object_size(object1) != json_object_size(object2))
return 0;
iter = json_object_iter(object1);
while(iter)
{
const char *key;
json_t *value1, *value2;
key = json_object_iter_key(iter);
value1 = json_object_iter_value(iter);
value2 = json_object_get(object2, key);
if(!json_equal(value1, value2))
return 0;
iter = json_object_iter_next(object1, iter);
}
return 1;
}
static json_t *json_object_copy(json_t *object)
{
json_t *result;
void *iter;
result = json_object();
if(!result)
return NULL;
iter = json_object_iter(object);
while(iter)
{
const char *key;
json_t *value;
key = json_object_iter_key(iter);
value = json_object_iter_value(iter);
json_object_set_nocheck(result, key, value);
iter = json_object_iter_next(object, iter);
}
return result;
}
static json_t *json_object_deep_copy(json_t *object)
{
json_t *result;
void *iter;
result = json_object();
if(!result)
return NULL;
iter = json_object_iter(object);
while(iter)
{
const char *key;
json_t *value;
key = json_object_iter_key(iter);
value = json_object_iter_value(iter);
json_object_set_new_nocheck(result, key, json_deep_copy(value));
iter = json_object_iter_next(object, iter);
}
return result;
}
/*** array ***/
json_t *json_array(void)
{
json_array_t *array = malloc(sizeof(json_array_t));
if(!array)
return NULL;
json_init(&array->json, JSON_ARRAY);
array->entries = 0;
array->size = 8;
array->table = malloc(array->size * sizeof(json_t *));
if(!array->table) {
free(array);
return NULL;
}
array->visited = 0;
return &array->json;
}
static void json_delete_array(json_array_t *array)
{
unsigned int i;
for(i = 0; i < array->entries; i++)
json_decref(array->table[i]);
free(array->table);
free(array);
}
unsigned int json_array_size(const json_t *json)
{
if(!json_is_array(json))
return 0;
return json_to_array(json)->entries;
}
json_t *json_array_get(const json_t *json, unsigned int index)
{
json_array_t *array;
if(!json_is_array(json))
return NULL;
array = json_to_array(json);
if(index >= array->entries)
return NULL;
return array->table[index];
}
int json_array_set_new(json_t *json, unsigned int index, json_t *value)
{
json_array_t *array;
if(!value)
return -1;
if(!json_is_array(json) || json == value)
{
json_decref(value);
return -1;
}
array = json_to_array(json);
if(index >= array->entries)
{
json_decref(value);
return -1;
}
json_decref(array->table[index]);
array->table[index] = value;
return 0;
}
static void array_move(json_array_t *array, unsigned int dest,
unsigned int src, unsigned int count)
{
memmove(&array->table[dest], &array->table[src], count * sizeof(json_t *));
}
static void array_copy(json_t **dest, unsigned int dpos,
json_t **src, unsigned int spos,
unsigned int count)
{
memcpy(&dest[dpos], &src[spos], count * sizeof(json_t *));
}
static json_t **json_array_grow(json_array_t *array,
unsigned int amount,
int copy)
{
unsigned int new_size;
json_t **old_table, **new_table;
if(array->entries + amount <= array->size)
return array->table;
old_table = array->table;
new_size = max(array->size + amount, array->size * 2);
new_table = malloc(new_size * sizeof(json_t *));
if(!new_table)
return NULL;
array->size = new_size;
array->table = new_table;
if(copy) {
array_copy(array->table, 0, old_table, 0, array->entries);
free(old_table);
return array->table;
}
return old_table;
}
int json_array_append_new(json_t *json, json_t *value)
{
json_array_t *array;
if(!value)
return -1;
if(!json_is_array(json) || json == value)
{
json_decref(value);
return -1;
}
array = json_to_array(json);
if(!json_array_grow(array, 1, 1)) {
json_decref(value);
return -1;
}
array->table[array->entries] = value;
array->entries++;
return 0;
}
int json_array_insert_new(json_t *json, unsigned int index, json_t *value)
{
json_array_t *array;
json_t **old_table;
if(!value)
return -1;
if(!json_is_array(json) || json == value) {
json_decref(value);
return -1;
}
array = json_to_array(json);
if(index > array->entries) {
json_decref(value);
return -1;
}
old_table = json_array_grow(array, 1, 0);
if(!old_table) {
json_decref(value);
return -1;
}
if(old_table != array->table) {
array_copy(array->table, 0, old_table, 0, index);
array_copy(array->table, index + 1, old_table, index,
array->entries - index);
free(old_table);
}
else
array_move(array, index + 1, index, array->entries - index);
array->table[index] = value;
array->entries++;
return 0;
}
int json_array_remove(json_t *json, unsigned int index)
{
json_array_t *array;
if(!json_is_array(json))
return -1;
array = json_to_array(json);
if(index >= array->entries)
return -1;
json_decref(array->table[index]);
array_move(array, index, index + 1, array->entries - index);
array->entries--;
return 0;
}
int json_array_clear(json_t *json)
{
json_array_t *array;
unsigned int i;
if(!json_is_array(json))
return -1;
array = json_to_array(json);
for(i = 0; i < array->entries; i++)
json_decref(array->table[i]);
array->entries = 0;
return 0;
}
int json_array_extend(json_t *json, json_t *other_json)
{
json_array_t *array, *other;
unsigned int i;
if(!json_is_array(json) || !json_is_array(other_json))
return -1;
array = json_to_array(json);
other = json_to_array(other_json);
if(!json_array_grow(array, other->entries, 1))
return -1;
for(i = 0; i < other->entries; i++)
json_incref(other->table[i]);
array_copy(array->table, array->entries, other->table, 0, other->entries);
array->entries += other->entries;
return 0;
}
static int json_array_equal(json_t *array1, json_t *array2)
{
unsigned int i, size;
size = json_array_size(array1);
if(size != json_array_size(array2))
return 0;
for(i = 0; i < size; i++)
{
json_t *value1, *value2;
value1 = json_array_get(array1, i);
value2 = json_array_get(array2, i);
if(!json_equal(value1, value2))
return 0;
}
return 1;
}
static json_t *json_array_copy(json_t *array)
{
json_t *result;
unsigned int i;
result = json_array();
if(!result)
return NULL;
for(i = 0; i < json_array_size(array); i++)
json_array_append(result, json_array_get(array, i));
return result;
}
static json_t *json_array_deep_copy(json_t *array)
{
json_t *result;
unsigned int i;
result = json_array();
if(!result)
return NULL;
for(i = 0; i < json_array_size(array); i++)
json_array_append_new(result, json_deep_copy(json_array_get(array, i)));
return result;
}
/*** string ***/
json_t *json_string_nocheck(const char *value)
{
json_string_t *string;
if(!value)
return NULL;
string = malloc(sizeof(json_string_t));
if(!string)
return NULL;
json_init(&string->json, JSON_STRING);
string->value = strdup(value);
if(!string->value) {
free(string);
return NULL;
}
return &string->json;
}
json_t *json_string(const char *value)
{
if(!value || !utf8_check_string(value, -1))
return NULL;
return json_string_nocheck(value);
}
const char *json_string_value(const json_t *json)
{
if(!json_is_string(json))
return NULL;
return json_to_string(json)->value;
}
int json_string_set_nocheck(json_t *json, const char *value)
{
char *dup;
json_string_t *string;
dup = strdup(value);
if(!dup)
return -1;
string = json_to_string(json);
free(string->value);
string->value = dup;
return 0;
}
int json_string_set(json_t *json, const char *value)
{
if(!value || !utf8_check_string(value, -1))
return -1;
return json_string_set_nocheck(json, value);
}
static void json_delete_string(json_string_t *string)
{
free(string->value);
free(string);
}
static int json_string_equal(json_t *string1, json_t *string2)
{
return strcmp(json_string_value(string1), json_string_value(string2)) == 0;
}
static json_t *json_string_copy(json_t *string)
{
return json_string_nocheck(json_string_value(string));
}
/*** integer ***/
json_t *json_integer(int value)
{
json_integer_t *integer = malloc(sizeof(json_integer_t));
if(!integer)
return NULL;
json_init(&integer->json, JSON_INTEGER);
integer->value = value;
return &integer->json;
}
int json_integer_value(const json_t *json)
{
if(!json_is_integer(json))
return 0;
return json_to_integer(json)->value;
}
int json_integer_set(json_t *json, int value)
{
if(!json_is_integer(json))
return -1;
json_to_integer(json)->value = value;
return 0;
}
static void json_delete_integer(json_integer_t *integer)
{
free(integer);
}
static int json_integer_equal(json_t *integer1, json_t *integer2)
{
return json_integer_value(integer1) == json_integer_value(integer2);
}
static json_t *json_integer_copy(json_t *integer)
{
return json_integer(json_integer_value(integer));
}
/*** real ***/
json_t *json_real(double value)
{
json_real_t *real = malloc(sizeof(json_real_t));
if(!real)
return NULL;
json_init(&real->json, JSON_REAL);
real->value = value;
return &real->json;
}
double json_real_value(const json_t *json)
{
if(!json_is_real(json))
return 0;
return json_to_real(json)->value;
}
int json_real_set(json_t *json, double value)
{
if(!json_is_real(json))
return 0;
json_to_real(json)->value = value;
return 0;
}
static void json_delete_real(json_real_t *real)
{
free(real);
}
static int json_real_equal(json_t *real1, json_t *real2)
{
return json_real_value(real1) == json_real_value(real2);
}
static json_t *json_real_copy(json_t *real)
{
return json_real(json_real_value(real));
}
/*** number ***/
double json_number_value(const json_t *json)
{
if(json_is_integer(json))
return json_integer_value(json);
else if(json_is_real(json))
return json_real_value(json);
else
return 0.0;
}
/*** simple values ***/
json_t *json_true(void)
{
static json_t the_true = {
.type = JSON_TRUE,
.refcount = (unsigned int)-1
};
return &the_true;
}
json_t *json_false(void)
{
static json_t the_false = {
.type = JSON_FALSE,
.refcount = (unsigned int)-1
};
return &the_false;
}
json_t *json_null(void)
{
static json_t the_null = {
.type = JSON_NULL,
.refcount = (unsigned int)-1
};
return &the_null;
}
/*** deletion ***/
void json_delete(json_t *json)
{
if(json_is_object(json))
json_delete_object(json_to_object(json));
else if(json_is_array(json))
json_delete_array(json_to_array(json));
else if(json_is_string(json))
json_delete_string(json_to_string(json));
else if(json_is_integer(json))
json_delete_integer(json_to_integer(json));
else if(json_is_real(json))
json_delete_real(json_to_real(json));
/* json_delete is not called for true, false or null */
}
/*** equality ***/
int json_equal(json_t *json1, json_t *json2)
{
if(!json1 || !json2)
return 0;
if(json_typeof(json1) != json_typeof(json2))
return 0;
/* this covers true, false and null as they are singletons */
if(json1 == json2)
return 1;
if(json_is_object(json1))
return json_object_equal(json1, json2);
if(json_is_array(json1))
return json_array_equal(json1, json2);
if(json_is_string(json1))
return json_string_equal(json1, json2);
if(json_is_integer(json1))
return json_integer_equal(json1, json2);
if(json_is_real(json1))
return json_real_equal(json1, json2);
return 0;
}
/*** copying ***/
json_t *json_copy(json_t *json)
{
if(!json)
return NULL;
if(json_is_object(json))
return json_object_copy(json);
if(json_is_array(json))
return json_array_copy(json);
if(json_is_string(json))
return json_string_copy(json);
if(json_is_integer(json))
return json_integer_copy(json);
if(json_is_real(json))
return json_real_copy(json);
if(json_is_true(json) || json_is_false(json) || json_is_null(json))
return json;
return NULL;
}
json_t *json_deep_copy(json_t *json)
{
if(!json)
return NULL;
if(json_is_object(json))
return json_object_deep_copy(json);
if(json_is_array(json))
return json_array_deep_copy(json);
/* for the rest of the types, deep copying doesn't differ from
shallow copying */
if(json_is_string(json))
return json_string_copy(json);
if(json_is_integer(json))
return json_integer_copy(json);
if(json_is_real(json))
return json_real_copy(json);
if(json_is_true(json) || json_is_false(json) || json_is_null(json))
return json;
return NULL;
}
0707010000001C000081A4000003E800000064000000015EF4BCA100000E55000000000000000000000000000000000000001C00000000cpuminer-2.5.1/configure.acAC_INIT([cpuminer], [2.5.1])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
AC_CONFIG_SRCDIR([cpu-miner.c])
AM_INIT_AUTOMAKE([gnu])
AC_CONFIG_HEADERS([cpuminer-config.h])
dnl Make sure anyone changing configure.ac/Makefile.am has a clue
AM_MAINTAINER_MODE
dnl Checks for programs
AC_PROG_CC_C99
AC_PROG_GCC_TRADITIONAL
AM_PROG_CC_C_O
AM_PROG_AS
AC_PROG_RANLIB
dnl Checks for header files
AC_HEADER_STDC
AC_CHECK_HEADERS([sys/endian.h sys/param.h syslog.h])
# sys/sysctl.h requires sys/types.h on FreeBSD
# sys/sysctl.h requires sys/param.h on OpenBSD
AC_CHECK_HEADERS([sys/sysctl.h], [], [],
[#include <sys/types.h>
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
])
AC_CHECK_DECLS([be32dec, le32dec, be32enc, le32enc], [], [],
[AC_INCLUDES_DEFAULT
#ifdef HAVE_SYS_ENDIAN_H
#include <sys/endian.h>
#endif
])
AC_FUNC_ALLOCA
AC_CHECK_FUNCS([getopt_long])
case $target in
i*86-*-*)
have_x86=true
;;
x86_64-*-*|amd64-*-*)
have_x86_64=true
;;
arm*-*-*)
have_arm=true
;;
powerpc*-*-*)
have_ppc=true
;;
esac
PTHREAD_FLAGS="-pthread"
WS2_LIBS=""
case $target in
*-*-mingw*)
have_win32=true
PTHREAD_FLAGS=""
WS2_LIBS="-lws2_32"
;;
esac
AC_ARG_ENABLE([assembly],
AS_HELP_STRING([--disable-assembly], [disable assembly-language routines]))
if test x$enable_assembly != xno; then
AC_DEFINE([USE_ASM], [1], [Define to 1 if assembly routines are wanted.])
fi
if test x$enable_assembly != xno -a x$have_x86_64 = xtrue
then
AC_MSG_CHECKING(whether we can compile AVX code)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vmovdqa %ymm0, %ymm1");])],
AC_DEFINE(USE_AVX, 1, [Define to 1 if AVX assembly is available.])
AC_MSG_RESULT(yes)
AC_MSG_CHECKING(whether we can compile XOP code)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vprotd \$7, %xmm0, %xmm1");])],
AC_DEFINE(USE_XOP, 1, [Define to 1 if XOP assembly is available.])
AC_MSG_RESULT(yes)
,
AC_MSG_RESULT(no)
AC_MSG_WARN([The assembler does not support the XOP instruction set.])
)
AC_MSG_CHECKING(whether we can compile AVX2 code)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vpaddd %ymm0, %ymm1, %ymm2");])],
AC_DEFINE(USE_AVX2, 1, [Define to 1 if AVX2 assembly is available.])
AC_MSG_RESULT(yes)
,
AC_MSG_RESULT(no)
AC_MSG_WARN([The assembler does not support the AVX2 instruction set.])
)
,
AC_MSG_RESULT(no)
AC_MSG_WARN([The assembler does not support the AVX instruction set.])
)
fi
AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS="-lpthread",
AC_CHECK_LIB([pthreadGC2], [pthread_create], PTHREAD_LIBS="-lpthreadGC2",
AC_CHECK_LIB([pthreadGC1], [pthread_create], PTHREAD_LIBS="-lpthreadGC1",
AC_CHECK_LIB([pthreadGC], [pthread_create], PTHREAD_LIBS="-lpthreadGC"
))))
AM_CONDITIONAL([WANT_JANSSON], [test x$request_jansson = xtrue])
AM_CONDITIONAL([HAVE_WINDOWS], [test x$have_win32 = xtrue])
AM_CONDITIONAL([USE_ASM], [test x$enable_assembly != xno])
AM_CONDITIONAL([ARCH_x86], [test x$have_x86 = xtrue])
AM_CONDITIONAL([ARCH_x86_64], [test x$have_x86_64 = xtrue])
AM_CONDITIONAL([ARCH_ARM], [test x$have_arm = xtrue])
AM_CONDITIONAL([ARCH_PPC], [test x$have_ppc = xtrue])
if test x$request_jansson = xtrue
then
JANSSON_LIBS="compat/jansson/libjansson.a"
else
JANSSON_LIBS=-ljansson
fi
LIBCURL_CHECK_CONFIG(, 7.15.2, ,
[AC_MSG_ERROR([Missing required libcurl >= 7.15.2])])
AC_SUBST(JANSSON_LIBS)
AC_SUBST(PTHREAD_FLAGS)
AC_SUBST(PTHREAD_LIBS)
AC_SUBST(WS2_LIBS)
AC_CONFIG_FILES([
Makefile
compat/Makefile
compat/jansson/Makefile
])
AC_OUTPUT
0707010000001D000081A4000003E800000064000000015EF4BCA10000C4D2000000000000000000000000000000000000001B00000000cpuminer-2.5.1/cpu-miner.c/*
* Copyright 2010 Jeff Garzik
* Copyright 2012-2017 pooler
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include "cpuminer-config.h"
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <inttypes.h>
#include <unistd.h>
#include <sys/time.h>
#include <time.h>
#ifdef WIN32
#include <windows.h>
#else
#include <errno.h>
#include <signal.h>
#include <sys/resource.h>
#if HAVE_SYS_SYSCTL_H
#include <sys/types.h>
#if HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
#include <sys/sysctl.h>
#endif
#endif
#include <jansson.h>
#include <curl/curl.h>
#include "compat.h"
#include "miner.h"
#define PROGRAM_NAME "minerd"
#define LP_SCANTIME 60
#ifdef __linux /* Linux specific policy and affinity management */
#include <sched.h>
static inline void drop_policy(void)
{
struct sched_param param;
param.sched_priority = 0;
#ifdef SCHED_IDLE
if (unlikely(sched_setscheduler(0, SCHED_IDLE, ¶m) == -1))
#endif
#ifdef SCHED_BATCH
sched_setscheduler(0, SCHED_BATCH, ¶m);
#endif
}
static inline void affine_to_cpu(int id, int cpu)
{
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(cpu, &set);
sched_setaffinity(0, sizeof(set), &set);
}
#elif defined(__FreeBSD__) /* FreeBSD specific policy and affinity management */
#include <sys/cpuset.h>
static inline void drop_policy(void)
{
}
static inline void affine_to_cpu(int id, int cpu)
{
cpuset_t set;
CPU_ZERO(&set);
CPU_SET(cpu, &set);
cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(cpuset_t), &set);
}
#else
static inline void drop_policy(void)
{
}
static inline void affine_to_cpu(int id, int cpu)
{
}
#endif
enum workio_commands {
WC_GET_WORK,
WC_SUBMIT_WORK,
};
struct workio_cmd {
enum workio_commands cmd;
struct thr_info *thr;
union {
struct work *work;
} u;
};
enum algos {
ALGO_SCRYPT, /* scrypt(1024,1,1) */
ALGO_SHA256D, /* SHA-256d */
};
static const char *algo_names[] = {
[ALGO_SCRYPT] = "scrypt",
[ALGO_SHA256D] = "sha256d",
};
bool opt_debug = false;
bool opt_protocol = false;
static bool opt_benchmark = false;
bool opt_redirect = true;
bool want_longpoll = true;
bool have_longpoll = false;
bool have_gbt = true;
bool allow_getwork = true;
bool want_stratum = true;
bool have_stratum = false;
bool use_syslog = false;
static bool opt_background = false;
static bool opt_quiet = false;
static int opt_retries = -1;
static int opt_fail_pause = 30;
int opt_timeout = 0;
static int opt_scantime = 5;
static enum algos opt_algo = ALGO_SCRYPT;
static int opt_scrypt_n = 1024;
static int opt_n_threads;
static int num_processors;
static char *rpc_url;
static char *rpc_userpass;
static char *rpc_user, *rpc_pass;
static int pk_script_size;
static unsigned char pk_script[42];
static char coinbase_sig[101] = "";
char *opt_cert;
char *opt_proxy;
long opt_proxy_type;
struct thr_info *thr_info;
static int work_thr_id;
int longpoll_thr_id = -1;
int stratum_thr_id = -1;
struct work_restart *work_restart = NULL;
static struct stratum_ctx stratum;
pthread_mutex_t applog_lock;
static pthread_mutex_t stats_lock;
static unsigned long accepted_count = 0L;
static unsigned long rejected_count = 0L;
static double *thr_hashrates;
#ifdef HAVE_GETOPT_LONG
#include <getopt.h>
#else
struct option {
const char *name;
int has_arg;
int *flag;
int val;
};
#endif
static char const usage[] = "\
Usage: " PROGRAM_NAME " [OPTIONS]\n\
Options:\n\
-a, --algo=ALGO specify the algorithm to use\n\
scrypt scrypt(1024, 1, 1) (default)\n\
scrypt:N scrypt(N, 1, 1)\n\
sha256d SHA-256d\n\
-o, --url=URL URL of mining server\n\
-O, --userpass=U:P username:password pair for mining server\n\
-u, --user=USERNAME username for mining server\n\
-p, --pass=PASSWORD password for mining server\n\
--cert=FILE certificate for mining server using SSL\n\
-x, --proxy=[PROTOCOL://]HOST[:PORT] connect through a proxy\n\
-t, --threads=N number of miner threads (default: number of processors)\n\
-r, --retries=N number of times to retry if a network call fails\n\
(default: retry indefinitely)\n\
-R, --retry-pause=N time to pause between retries, in seconds (default: 30)\n\
-T, --timeout=N timeout for long polling, in seconds (default: none)\n\
-s, --scantime=N upper bound on time spent scanning current work when\n\
long polling is unavailable, in seconds (default: 5)\n\
--coinbase-addr=ADDR payout address for solo mining\n\
--coinbase-sig=TEXT data to insert in the coinbase when possible\n\
--no-longpoll disable long polling support\n\
--no-getwork disable getwork support\n\
--no-gbt disable getblocktemplate support\n\
--no-stratum disable X-Stratum support\n\
--no-redirect ignore requests to change the URL of the mining server\n\
-q, --quiet disable per-thread hashmeter output\n\
-D, --debug enable debug output\n\
-P, --protocol-dump verbose dump of protocol-level activities\n"
#ifdef HAVE_SYSLOG_H
"\
-S, --syslog use system log for output messages\n"
#endif
#ifndef WIN32
"\
-B, --background run the miner in the background\n"
#endif
"\
--benchmark run in offline benchmark mode\n\
-c, --config=FILE load a JSON-format configuration file\n\
-V, --version display version information and exit\n\
-h, --help display this help text and exit\n\
";
static char const short_options[] =
#ifndef WIN32
"B"
#endif
#ifdef HAVE_SYSLOG_H
"S"
#endif
"a:c:Dhp:Px:qr:R:s:t:T:o:u:O:V";
static struct option const options[] = {
{ "algo", 1, NULL, 'a' },
#ifndef WIN32
{ "background", 0, NULL, 'B' },
#endif
{ "benchmark", 0, NULL, 1005 },
{ "cert", 1, NULL, 1001 },
{ "coinbase-addr", 1, NULL, 1013 },
{ "coinbase-sig", 1, NULL, 1015 },
{ "config", 1, NULL, 'c' },
{ "debug", 0, NULL, 'D' },
{ "help", 0, NULL, 'h' },
{ "no-gbt", 0, NULL, 1011 },
{ "no-getwork", 0, NULL, 1010 },
{ "no-longpoll", 0, NULL, 1003 },
{ "no-redirect", 0, NULL, 1009 },
{ "no-stratum", 0, NULL, 1007 },
{ "pass", 1, NULL, 'p' },
{ "protocol-dump", 0, NULL, 'P' },
{ "proxy", 1, NULL, 'x' },
{ "quiet", 0, NULL, 'q' },
{ "retries", 1, NULL, 'r' },
{ "retry-pause", 1, NULL, 'R' },
{ "scantime", 1, NULL, 's' },
#ifdef HAVE_SYSLOG_H
{ "syslog", 0, NULL, 'S' },
#endif
{ "threads", 1, NULL, 't' },
{ "timeout", 1, NULL, 'T' },
{ "url", 1, NULL, 'o' },
{ "user", 1, NULL, 'u' },
{ "userpass", 1, NULL, 'O' },
{ "version", 0, NULL, 'V' },
{ 0, 0, 0, 0 }
};
struct work {
uint32_t data[32];
uint32_t target[8];
int height;
char *txs;
char *workid;
char *job_id;
size_t xnonce2_len;
unsigned char *xnonce2;
};
static struct work g_work;
static time_t g_work_time;
static pthread_mutex_t g_work_lock;
static bool submit_old = false;
static char *lp_id;
static inline void work_free(struct work *w)
{
free(w->txs);
free(w->workid);
free(w->job_id);
free(w->xnonce2);
}
static inline void work_copy(struct work *dest, const struct work *src)
{
memcpy(dest, src, sizeof(struct work));
if (src->txs)
dest->txs = strdup(src->txs);
if (src->workid)
dest->workid = strdup(src->workid);
if (src->job_id)
dest->job_id = strdup(src->job_id);
if (src->xnonce2) {
dest->xnonce2 = malloc(src->xnonce2_len);
memcpy(dest->xnonce2, src->xnonce2, src->xnonce2_len);
}
}
static bool jobj_binary(const json_t *obj, const char *key,
void *buf, size_t buflen)
{
const char *hexstr;
json_t *tmp;
tmp = json_object_get(obj, key);
if (unlikely(!tmp)) {
applog(LOG_ERR, "JSON key '%s' not found", key);
return false;
}
hexstr = json_string_value(tmp);
if (unlikely(!hexstr)) {
applog(LOG_ERR, "JSON key '%s' is not a string", key);
return false;
}
if (!hex2bin(buf, hexstr, buflen))
return false;
return true;
}
static bool work_decode(const json_t *val, struct work *work)
{
int i;
if (unlikely(!jobj_binary(val, "data", work->data, sizeof(work->data)))) {
applog(LOG_ERR, "JSON invalid data");
goto err_out;
}
if (unlikely(!jobj_binary(val, "target", work->target, sizeof(work->target)))) {
applog(LOG_ERR, "JSON invalid target");
goto err_out;
}
for (i = 0; i < ARRAY_SIZE(work->data); i++)
work->data[i] = le32dec(work->data + i);
for (i = 0; i < ARRAY_SIZE(work->target); i++)
work->target[i] = le32dec(work->target + i);
return true;
err_out:
return false;
}
static bool gbt_work_decode(const json_t *val, struct work *work)
{
int i, n;
uint32_t version, curtime, bits;
uint32_t prevhash[8];
uint32_t target[8];
int cbtx_size;
unsigned char *cbtx = NULL;
int tx_count, tx_size;
unsigned char txc_vi[9];
unsigned char (*merkle_tree)[32] = NULL;
bool coinbase_append = false;
bool submit_coinbase = false;
bool segwit = false;
json_t *tmp, *txa;
bool rc = false;
tmp = json_object_get(val, "rules");
if (tmp && json_is_array(tmp)) {
n = json_array_size(tmp);
for (i = 0; i < n; i++) {
const char *s = json_string_value(json_array_get(tmp, i));
if (!s)
continue;
if (!strcmp(s, "segwit") || !strcmp(s, "!segwit"))
segwit = true;
}
}
tmp = json_object_get(val, "mutable");
if (tmp && json_is_array(tmp)) {
n = json_array_size(tmp);
for (i = 0; i < n; i++) {
const char *s = json_string_value(json_array_get(tmp, i));
if (!s)
continue;
if (!strcmp(s, "coinbase/append"))
coinbase_append = true;
else if (!strcmp(s, "submit/coinbase"))
submit_coinbase = true;
}
}
tmp = json_object_get(val, "height");
if (!tmp || !json_is_integer(tmp)) {
applog(LOG_ERR, "JSON invalid height");
goto out;
}
work->height = json_integer_value(tmp);
tmp = json_object_get(val, "version");
if (!tmp || !json_is_integer(tmp)) {
applog(LOG_ERR, "JSON invalid version");
goto out;
}
version = json_integer_value(tmp);
if (unlikely(!jobj_binary(val, "previousblockhash", prevhash, sizeof(prevhash)))) {
applog(LOG_ERR, "JSON invalid previousblockhash");
goto out;
}
tmp = json_object_get(val, "curtime");
if (!tmp || !json_is_integer(tmp)) {
applog(LOG_ERR, "JSON invalid curtime");
goto out;
}
curtime = json_integer_value(tmp);
if (unlikely(!jobj_binary(val, "bits", &bits, sizeof(bits)))) {
applog(LOG_ERR, "JSON invalid bits");
goto out;
}
/* find count and size of transactions */
txa = json_object_get(val, "transactions");
if (!txa || !json_is_array(txa)) {
applog(LOG_ERR, "JSON invalid transactions");
goto out;
}
tx_count = json_array_size(txa);
tx_size = 0;
for (i = 0; i < tx_count; i++) {
const json_t *tx = json_array_get(txa, i);
const char *tx_hex = json_string_value(json_object_get(tx, "data"));
if (!tx_hex) {
applog(LOG_ERR, "JSON invalid transactions");
goto out;
}
tx_size += strlen(tx_hex) / 2;
}
/* build coinbase transaction */
tmp = json_object_get(val, "coinbasetxn");
if (tmp) {
const char *cbtx_hex = json_string_value(json_object_get(tmp, "data"));
cbtx_size = cbtx_hex ? strlen(cbtx_hex) / 2 : 0;
cbtx = malloc(cbtx_size + 100);
if (cbtx_size < 60 || !hex2bin(cbtx, cbtx_hex, cbtx_size)) {
applog(LOG_ERR, "JSON invalid coinbasetxn");
goto out;
}
} else {
int64_t cbvalue;
if (!pk_script_size) {
if (allow_getwork) {
applog(LOG_INFO, "No payout address provided, switching to getwork");
have_gbt = false;
} else
applog(LOG_ERR, "No payout address provided");
goto out;
}
tmp = json_object_get(val, "coinbasevalue");
if (!tmp || !json_is_number(tmp)) {
applog(LOG_ERR, "JSON invalid coinbasevalue");
goto out;
}
cbvalue = json_is_integer(tmp) ? json_integer_value(tmp) : json_number_value(tmp);
cbtx = malloc(256);
le32enc((uint32_t *)cbtx, 1); /* version */
cbtx[4] = 1; /* in-counter */
memset(cbtx+5, 0x00, 32); /* prev txout hash */
le32enc((uint32_t *)(cbtx+37), 0xffffffff); /* prev txout index */
cbtx_size = 43;
/* BIP 34: height in coinbase */
for (n = work->height; n; n >>= 8) {
cbtx[cbtx_size++] = n & 0xff;
if (n < 0x100 && n >= 0x80)
cbtx[cbtx_size++] = 0;
}
cbtx[42] = cbtx_size - 43;
cbtx[41] = cbtx_size - 42; /* scriptsig length */
le32enc((uint32_t *)(cbtx+cbtx_size), 0xffffffff); /* sequence */
cbtx_size += 4;
cbtx[cbtx_size++] = segwit ? 2 : 1; /* out-counter */
le32enc((uint32_t *)(cbtx+cbtx_size), (uint32_t)cbvalue); /* value */
le32enc((uint32_t *)(cbtx+cbtx_size+4), cbvalue >> 32);
cbtx_size += 8;
cbtx[cbtx_size++] = pk_script_size; /* txout-script length */
memcpy(cbtx+cbtx_size, pk_script, pk_script_size);
cbtx_size += pk_script_size;
if (segwit) {
unsigned char (*wtree)[32] = calloc(tx_count + 2, 32);
memset(cbtx+cbtx_size, 0, 8); /* value */
cbtx_size += 8;
cbtx[cbtx_size++] = 38; /* txout-script length */
cbtx[cbtx_size++] = 0x6a; /* txout-script */
cbtx[cbtx_size++] = 0x24;
cbtx[cbtx_size++] = 0xaa;
cbtx[cbtx_size++] = 0x21;
cbtx[cbtx_size++] = 0xa9;
cbtx[cbtx_size++] = 0xed;
for (i = 0; i < tx_count; i++) {
const json_t *tx = json_array_get(txa, i);
const json_t *hash = json_object_get(tx, "hash");
if (!hash || !hex2bin(wtree[1+i], json_string_value(hash), 32)) {
applog(LOG_ERR, "JSON invalid transaction hash");
free(wtree);
goto out;
}
memrev(wtree[1+i], 32);
}
n = tx_count + 1;
while (n > 1) {
if (n % 2)
memcpy(wtree[n], wtree[n-1], 32);
n = (n + 1) / 2;
for (i = 0; i < n; i++)
sha256d(wtree[i], wtree[2*i], 64);
}
memset(wtree[1], 0, 32); /* witness reserved value = 0 */
sha256d(cbtx+cbtx_size, wtree[0], 64);
cbtx_size += 32;
free(wtree);
}
le32enc((uint32_t *)(cbtx+cbtx_size), 0); /* lock time */
cbtx_size += 4;
coinbase_append = true;
}
if (coinbase_append) {
unsigned char xsig[100];
int xsig_len = 0;
if (*coinbase_sig) {
n = strlen(coinbase_sig);
if (cbtx[41] + xsig_len + n <= 100) {
memcpy(xsig+xsig_len, coinbase_sig, n);
xsig_len += n;
} else {
applog(LOG_WARNING, "Signature does not fit in coinbase, skipping");
}
}
tmp = json_object_get(val, "coinbaseaux");
if (tmp && json_is_object(tmp)) {
void *iter = json_object_iter(tmp);
while (iter) {
unsigned char buf[100];
const char *s = json_string_value(json_object_iter_value(iter));
n = s ? strlen(s) / 2 : 0;
if (!s || n > 100 || !hex2bin(buf, s, n)) {
applog(LOG_ERR, "JSON invalid coinbaseaux");
break;
}
if (cbtx[41] + xsig_len + n <= 100) {
memcpy(xsig+xsig_len, buf, n);
xsig_len += n;
}
iter = json_object_iter_next(tmp, iter);
}
}
if (xsig_len) {
unsigned char *ssig_end = cbtx + 42 + cbtx[41];
int push_len = cbtx[41] + xsig_len < 76 ? 1 :
cbtx[41] + 2 + xsig_len > 100 ? 0 : 2;
n = xsig_len + push_len;
memmove(ssig_end + n, ssig_end, cbtx_size - 42 - cbtx[41]);
cbtx[41] += n;
if (push_len == 2)
*(ssig_end++) = 0x4c; /* OP_PUSHDATA1 */
if (push_len)
*(ssig_end++) = xsig_len;
memcpy(ssig_end, xsig, xsig_len);
cbtx_size += n;
}
}
n = varint_encode(txc_vi, 1 + tx_count);
work->txs = malloc(2 * (n + cbtx_size + tx_size) + 1);
bin2hex(work->txs, txc_vi, n);
bin2hex(work->txs + 2*n, cbtx, cbtx_size);
/* generate merkle root */
merkle_tree = malloc(32 * ((1 + tx_count + 1) & ~1));
sha256d(merkle_tree[0], cbtx, cbtx_size);
for (i = 0; i < tx_count; i++) {
tmp = json_array_get(txa, i);
const char *tx_hex = json_string_value(json_object_get(tmp, "data"));
const int tx_size = tx_hex ? strlen(tx_hex) / 2 : 0;
if (segwit) {
const char *txid = json_string_value(json_object_get(tmp, "txid"));
if (!txid || !hex2bin(merkle_tree[1 + i], txid, 32)) {
applog(LOG_ERR, "JSON invalid transaction txid");
goto out;
}
memrev(merkle_tree[1 + i], 32);
} else {
unsigned char *tx = malloc(tx_size);
if (!tx_hex || !hex2bin(tx, tx_hex, tx_size)) {
applog(LOG_ERR, "JSON invalid transactions");
free(tx);
goto out;
}
sha256d(merkle_tree[1 + i], tx, tx_size);
free(tx);
}
if (!submit_coinbase)
strcat(work->txs, tx_hex);
}
n = 1 + tx_count;
while (n > 1) {
if (n % 2) {
memcpy(merkle_tree[n], merkle_tree[n-1], 32);
++n;
}
n /= 2;
for (i = 0; i < n; i++)
sha256d(merkle_tree[i], merkle_tree[2*i], 64);
}
/* assemble block header */
work->data[0] = swab32(version);
for (i = 0; i < 8; i++)
work->data[8 - i] = le32dec(prevhash + i);
for (i = 0; i < 8; i++)
work->data[9 + i] = be32dec((uint32_t *)merkle_tree[0] + i);
work->data[17] = swab32(curtime);
work->data[18] = le32dec(&bits);
memset(work->data + 19, 0x00, 52);
work->data[20] = 0x80000000;
work->data[31] = 0x00000280;
if (unlikely(!jobj_binary(val, "target", target, sizeof(target)))) {
applog(LOG_ERR, "JSON invalid target");
goto out;
}
for (i = 0; i < ARRAY_SIZE(work->target); i++)
work->target[7 - i] = be32dec(target + i);
tmp = json_object_get(val, "workid");
if (tmp) {
if (!json_is_string(tmp)) {
applog(LOG_ERR, "JSON invalid workid");
goto out;
}
work->workid = strdup(json_string_value(tmp));
}
/* Long polling */
tmp = json_object_get(val, "longpollid");
if (want_longpoll && json_is_string(tmp)) {
free(lp_id);
lp_id = strdup(json_string_value(tmp));
if (!have_longpoll) {
char *lp_uri;
tmp = json_object_get(val, "longpolluri");
lp_uri = strdup(json_is_string(tmp) ? json_string_value(tmp) : rpc_url);
have_longpoll = true;
tq_push(thr_info[longpoll_thr_id].q, lp_uri);
}
}
rc = true;
out:
free(cbtx);
free(merkle_tree);
return rc;
}
static void share_result(int result, const char *reason)
{
char s[345];
double hashrate;
int i;
hashrate = 0.;
pthread_mutex_lock(&stats_lock);
for (i = 0; i < opt_n_threads; i++)
hashrate += thr_hashrates[i];
result ? accepted_count++ : rejected_count++;
pthread_mutex_unlock(&stats_lock);
sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * hashrate);
applog(LOG_INFO, "accepted: %lu/%lu (%.2f%%), %s khash/s %s",
accepted_count,
accepted_count + rejected_count,
100. * accepted_count / (accepted_count + rejected_count),
s,
result ? "(yay!!!)" : "(booooo)");
if (opt_debug && reason)
applog(LOG_DEBUG, "DEBUG: reject reason: %s", reason);
}
static bool submit_upstream_work(CURL *curl, struct work *work)
{
json_t *val, *res, *reason;
char data_str[2 * sizeof(work->data) + 1];
char s[345];
int i;
bool rc = false;
/* pass if the previous hash is not the current previous hash */
if (!submit_old && memcmp(work->data + 1, g_work.data + 1, 32)) {
if (opt_debug)
applog(LOG_DEBUG, "DEBUG: stale work detected, discarding");
return true;
}
if (have_stratum) {
uint32_t ntime, nonce;
char ntimestr[9], noncestr[9], *xnonce2str, *req;
le32enc(&ntime, work->data[17]);
le32enc(&nonce, work->data[19]);
bin2hex(ntimestr, (const unsigned char *)(&ntime), 4);
bin2hex(noncestr, (const unsigned char *)(&nonce), 4);
xnonce2str = abin2hex(work->xnonce2, work->xnonce2_len);
req = malloc(256 + strlen(rpc_user) + strlen(work->job_id) + 2 * work->xnonce2_len);
sprintf(req,
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr);
free(xnonce2str);
rc = stratum_send_line(&stratum, req);
free(req);
if (unlikely(!rc)) {
applog(LOG_ERR, "submit_upstream_work stratum_send_line failed");
goto out;
}
} else if (work->txs) {
char *req;
for (i = 0; i < ARRAY_SIZE(work->data); i++)
be32enc(work->data + i, work->data[i]);
bin2hex(data_str, (unsigned char *)work->data, 80);
if (work->workid) {
char *params;
val = json_object();
json_object_set_new(val, "workid", json_string(work->workid));
params = json_dumps(val, 0);
json_decref(val);
req = malloc(128 + 2*80 + strlen(work->txs) + strlen(params));
sprintf(req,
"{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":1}\r\n",
data_str, work->txs, params);
free(params);
} else {
req = malloc(128 + 2*80 + strlen(work->txs));
sprintf(req,
"{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":1}\r\n",
data_str, work->txs);
}
val = json_rpc_call(curl, rpc_url, rpc_userpass, req, NULL, 0);
free(req);
if (unlikely(!val)) {
applog(LOG_ERR, "submit_upstream_work json_rpc_call failed");
goto out;
}
res = json_object_get(val, "result");
if (json_is_object(res)) {
char *res_str;
bool sumres = false;
void *iter = json_object_iter(res);
while (iter) {
if (json_is_null(json_object_iter_value(iter))) {
sumres = true;
break;
}
iter = json_object_iter_next(res, iter);
}
res_str = json_dumps(res, 0);
share_result(sumres, res_str);
free(res_str);
} else
share_result(json_is_null(res), json_string_value(res));
json_decref(val);
} else {
/* build hex string */
for (i = 0; i < ARRAY_SIZE(work->data); i++)
le32enc(work->data + i, work->data[i]);
bin2hex(data_str, (unsigned char *)work->data, sizeof(work->data));
/* build JSON-RPC request */
sprintf(s,
"{\"method\": \"getwork\", \"params\": [ \"%s\" ], \"id\":1}\r\n",
data_str);
/* issue JSON-RPC request */
val = json_rpc_call(curl, rpc_url, rpc_userpass, s, NULL, 0);
if (unlikely(!val)) {
applog(LOG_ERR, "submit_upstream_work json_rpc_call failed");
goto out;
}
res = json_object_get(val, "result");
reason = json_object_get(val, "reject-reason");
share_result(json_is_true(res), reason ? json_string_value(reason) : NULL);
json_decref(val);
}
rc = true;
out:
return rc;
}
static const char *getwork_req =
"{\"method\": \"getwork\", \"params\": [], \"id\":0}\r\n";
#define GBT_CAPABILITIES "[\"coinbasetxn\", \"coinbasevalue\", \"longpoll\", \"workid\"]"
#define GBT_RULES "[\"segwit\"]"
static const char *gbt_req =
"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
GBT_CAPABILITIES ", \"rules\": " GBT_RULES "}], \"id\":0}\r\n";
static const char *gbt_lp_req =
"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
GBT_CAPABILITIES ", \"rules\": " GBT_RULES ", \"longpollid\": \"%s\"}], \"id\":0}\r\n";
static bool get_upstream_work(CURL *curl, struct work *work)
{
json_t *val;
int err;
bool rc;
struct timeval tv_start, tv_end, diff;
start:
gettimeofday(&tv_start, NULL);
val = json_rpc_call(curl, rpc_url, rpc_userpass,
have_gbt ? gbt_req : getwork_req,
&err, have_gbt ? JSON_RPC_QUIET_404 : 0);
gettimeofday(&tv_end, NULL);
if (have_stratum) {
if (val)
json_decref(val);
return true;
}
if (!have_gbt && !allow_getwork) {
applog(LOG_ERR, "No usable protocol");
if (val)
json_decref(val);
return false;
}
if (have_gbt && allow_getwork && !val && err == CURLE_OK) {
applog(LOG_INFO, "getblocktemplate failed, falling back to getwork");
have_gbt = false;
goto start;
}
if (!val)
return false;
if (have_gbt) {
rc = gbt_work_decode(json_object_get(val, "result"), work);
if (!have_gbt) {
json_decref(val);
goto start;
}
} else
rc = work_decode(json_object_get(val, "result"), work);
if (opt_debug && rc) {
timeval_subtract(&diff, &tv_end, &tv_start);
applog(LOG_DEBUG, "DEBUG: got new work in %d ms",
diff.tv_sec * 1000 + diff.tv_usec / 1000);
}
json_decref(val);
return rc;
}
static void workio_cmd_free(struct workio_cmd *wc)
{
if (!wc)
return;
switch (wc->cmd) {
case WC_SUBMIT_WORK:
work_free(wc->u.work);
free(wc->u.work);
break;
default: /* do nothing */
break;
}
memset(wc, 0, sizeof(*wc)); /* poison */
free(wc);
}
static bool workio_get_work(struct workio_cmd *wc, CURL *curl)
{
struct work *ret_work;
int failures = 0;
ret_work = calloc(1, sizeof(*ret_work));
if (!ret_work)
return false;
/* obtain new work from bitcoin via JSON-RPC */
while (!get_upstream_work(curl, ret_work)) {
if (unlikely((opt_retries >= 0) && (++failures > opt_retries))) {
applog(LOG_ERR, "json_rpc_call failed, terminating workio thread");
free(ret_work);
return false;
}
/* pause, then restart work-request loop */
applog(LOG_ERR, "json_rpc_call failed, retry after %d seconds",
opt_fail_pause);
sleep(opt_fail_pause);
}
/* send work to requesting thread */
if (!tq_push(wc->thr->q, ret_work))
free(ret_work);
return true;
}
static bool workio_submit_work(struct workio_cmd *wc, CURL *curl)
{
int failures = 0;
/* submit solution to bitcoin via JSON-RPC */
while (!submit_upstream_work(curl, wc->u.work)) {
if (unlikely((opt_retries >= 0) && (++failures > opt_retries))) {
applog(LOG_ERR, "...terminating workio thread");
return false;
}
/* pause, then restart work-request loop */
applog(LOG_ERR, "...retry after %d seconds",
opt_fail_pause);
sleep(opt_fail_pause);
}
return true;
}
static void *workio_thread(void *userdata)
{
struct thr_info *mythr = userdata;
CURL *curl;
bool ok = true;
curl = curl_easy_init();
if (unlikely(!curl)) {
applog(LOG_ERR, "CURL initialization failed");
return NULL;
}
while (ok) {
struct workio_cmd *wc;
/* wait for workio_cmd sent to us, on our queue */
wc = tq_pop(mythr->q, NULL);
if (!wc) {
ok = false;
break;
}
/* process workio_cmd */
switch (wc->cmd) {
case WC_GET_WORK:
ok = workio_get_work(wc, curl);
break;
case WC_SUBMIT_WORK:
ok = workio_submit_work(wc, curl);
break;
default: /* should never happen */
ok = false;
break;
}
workio_cmd_free(wc);
}
tq_freeze(mythr->q);
curl_easy_cleanup(curl);
return NULL;
}
static bool get_work(struct thr_info *thr, struct work *work)
{
struct workio_cmd *wc;
struct work *work_heap;
if (opt_benchmark) {
memset(work->data, 0x55, 76);
work->data[17] = swab32(time(NULL));
memset(work->data + 19, 0x00, 52);
work->data[20] = 0x80000000;
work->data[31] = 0x00000280;
memset(work->target, 0x00, sizeof(work->target));
return true;
}
/* fill out work request message */
wc = calloc(1, sizeof(*wc));
if (!wc)
return false;
wc->cmd = WC_GET_WORK;
wc->thr = thr;
/* send work request to workio thread */
if (!tq_push(thr_info[work_thr_id].q, wc)) {
workio_cmd_free(wc);
return false;
}
/* wait for response, a unit of work */
work_heap = tq_pop(thr->q, NULL);
if (!work_heap)
return false;
/* copy returned work into storage provided by caller */
memcpy(work, work_heap, sizeof(*work));
free(work_heap);
return true;
}
static bool submit_work(struct thr_info *thr, const struct work *work_in)
{
struct workio_cmd *wc;
/* fill out work request message */
wc = calloc(1, sizeof(*wc));
if (!wc)
return false;
wc->u.work = malloc(sizeof(*work_in));
if (!wc->u.work)
goto err_out;
wc->cmd = WC_SUBMIT_WORK;
wc->thr = thr;
work_copy(wc->u.work, work_in);
/* send solution to workio thread */
if (!tq_push(thr_info[work_thr_id].q, wc))
goto err_out;
return true;
err_out:
workio_cmd_free(wc);
return false;
}
static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
{
unsigned char merkle_root[64];
int i;
pthread_mutex_lock(&sctx->work_lock);
free(work->job_id);
work->job_id = strdup(sctx->job.job_id);
work->xnonce2_len = sctx->xnonce2_size;
work->xnonce2 = realloc(work->xnonce2, sctx->xnonce2_size);
memcpy(work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size);
/* Generate merkle root */
sha256d(merkle_root, sctx->job.coinbase, sctx->job.coinbase_size);
for (i = 0; i < sctx->job.merkle_count; i++) {
memcpy(merkle_root + 32, sctx->job.merkle[i], 32);
sha256d(merkle_root, merkle_root, 64);
}
/* Increment extranonce2 */
for (i = 0; i < sctx->xnonce2_size && !++sctx->job.xnonce2[i]; i++);
/* Assemble block header */
memset(work->data, 0, 128);
work->data[0] = le32dec(sctx->job.version);
for (i = 0; i < 8; i++)
work->data[1 + i] = le32dec((uint32_t *)sctx->job.prevhash + i);
for (i = 0; i < 8; i++)
work->data[9 + i] = be32dec((uint32_t *)merkle_root + i);
work->data[17] = le32dec(sctx->job.ntime);
work->data[18] = le32dec(sctx->job.nbits);
work->data[20] = 0x80000000;
work->data[31] = 0x00000280;
pthread_mutex_unlock(&sctx->work_lock);
if (opt_debug) {
char *xnonce2str = abin2hex(work->xnonce2, work->xnonce2_len);
applog(LOG_DEBUG, "DEBUG: job_id='%s' extranonce2=%s ntime=%08x",
work->job_id, xnonce2str, swab32(work->data[17]));
free(xnonce2str);
}
if (opt_algo == ALGO_SCRYPT)
diff_to_target(work->target, sctx->job.diff / 65536.0);
else
diff_to_target(work->target, sctx->job.diff);
}
static void *miner_thread(void *userdata)
{
struct thr_info *mythr = userdata;
int thr_id = mythr->id;
struct work work = {{0}};
uint32_t max_nonce;
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
unsigned char *scratchbuf = NULL;
char s[16];
int i;
/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
* and if that fails, then SCHED_BATCH. No need for this to be an
* error if it fails */
if (!opt_benchmark) {
setpriority(PRIO_PROCESS, 0, 19);
drop_policy();
}
/* Cpu affinity only makes sense if the number of threads is a multiple
* of the number of CPUs */
if (num_processors > 1 && opt_n_threads % num_processors == 0) {
if (!opt_quiet)
applog(LOG_INFO, "Binding thread %d to cpu %d",
thr_id, thr_id % num_processors);
affine_to_cpu(thr_id, thr_id % num_processors);
}
if (opt_algo == ALGO_SCRYPT) {
scratchbuf = scrypt_buffer_alloc(opt_scrypt_n);
if (!scratchbuf) {
applog(LOG_ERR, "scrypt buffer allocation failed");
pthread_mutex_lock(&applog_lock);
exit(1);
}
}
while (1) {
unsigned long hashes_done;
struct timeval tv_start, tv_end, diff;
int64_t max64;
int rc;
if (have_stratum) {
while (time(NULL) >= g_work_time + 120)
sleep(1);
pthread_mutex_lock(&g_work_lock);
if (work.data[19] >= end_nonce && !memcmp(work.data, g_work.data, 76))
stratum_gen_work(&stratum, &g_work);
} else {
int min_scantime = have_longpoll ? LP_SCANTIME : opt_scantime;
/* obtain new work from internal workio thread */
pthread_mutex_lock(&g_work_lock);
if (!have_stratum &&
(time(NULL) - g_work_time >= min_scantime ||
work.data[19] >= end_nonce)) {
work_free(&g_work);
if (unlikely(!get_work(mythr, &g_work))) {
applog(LOG_ERR, "work retrieval failed, exiting "
"mining thread %d", mythr->id);
pthread_mutex_unlock(&g_work_lock);
goto out;
}
g_work_time = have_stratum ? 0 : time(NULL);
}
if (have_stratum) {
pthread_mutex_unlock(&g_work_lock);
continue;
}
}
if (memcmp(work.data, g_work.data, 76)) {
work_free(&work);
work_copy(&work, &g_work);
work.data[19] = 0xffffffffU / opt_n_threads * thr_id;
} else
work.data[19]++;
pthread_mutex_unlock(&g_work_lock);
work_restart[thr_id].restart = 0;
/* adjust max_nonce to meet target scan time */
if (have_stratum)
max64 = LP_SCANTIME;
else
max64 = g_work_time + (have_longpoll ? LP_SCANTIME : opt_scantime)
- time(NULL);
max64 *= thr_hashrates[thr_id];
if (max64 <= 0) {
switch (opt_algo) {
case ALGO_SCRYPT:
max64 = opt_scrypt_n < 16 ? 0x3ffff : 0x3fffff / opt_scrypt_n;
break;
case ALGO_SHA256D:
max64 = 0x1fffff;
break;
}
}
if (work.data[19] + max64 > end_nonce)
max_nonce = end_nonce;
else
max_nonce = work.data[19] + max64;
hashes_done = 0;
gettimeofday(&tv_start, NULL);
/* scan nonces for a proof-of-work hash */
switch (opt_algo) {
case ALGO_SCRYPT:
rc = scanhash_scrypt(thr_id, work.data, scratchbuf, work.target,
max_nonce, &hashes_done, opt_scrypt_n);
break;
case ALGO_SHA256D:
rc = scanhash_sha256d(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
default:
/* should never happen */
goto out;
}
/* record scanhash elapsed time */
gettimeofday(&tv_end, NULL);
timeval_subtract(&diff, &tv_end, &tv_start);
if (diff.tv_usec || diff.tv_sec) {
pthread_mutex_lock(&stats_lock);
thr_hashrates[thr_id] =
hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
pthread_mutex_unlock(&stats_lock);
}
if (!opt_quiet) {
sprintf(s, thr_hashrates[thr_id] >= 1e6 ? "%.0f" : "%.2f",
1e-3 * thr_hashrates[thr_id]);
applog(LOG_INFO, "thread %d: %lu hashes, %s khash/s",
thr_id, hashes_done, s);
}
if (opt_benchmark && thr_id == opt_n_threads - 1) {
double hashrate = 0.;
for (i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
hashrate += thr_hashrates[i];
if (i == opt_n_threads) {
sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * hashrate);
applog(LOG_INFO, "Total: %s khash/s", s);
}
}
/* if nonce found, submit work */
if (rc && !opt_benchmark && !submit_work(mythr, &work))
break;
}
out:
tq_freeze(mythr->q);
return NULL;
}
static void restart_threads(void)
{
int i;
for (i = 0; i < opt_n_threads; i++)
work_restart[i].restart = 1;
}
static void *longpoll_thread(void *userdata)
{
struct thr_info *mythr = userdata;
CURL *curl = NULL;
char *copy_start, *hdr_path = NULL, *lp_url = NULL;
bool need_slash = false;
curl = curl_easy_init();
if (unlikely(!curl)) {
applog(LOG_ERR, "CURL initialization failed");
goto out;
}
start:
hdr_path = tq_pop(mythr->q, NULL);
if (!hdr_path)
goto out;
/* full URL */
if (strstr(hdr_path, "://")) {
lp_url = hdr_path;
hdr_path = NULL;
}
/* absolute path, on current server */
else {
copy_start = (*hdr_path == '/') ? (hdr_path + 1) : hdr_path;
if (rpc_url[strlen(rpc_url) - 1] != '/')
need_slash = true;
lp_url = malloc(strlen(rpc_url) + strlen(copy_start) + 2);
if (!lp_url)
goto out;
sprintf(lp_url, "%s%s%s", rpc_url, need_slash ? "/" : "", copy_start);
}
applog(LOG_INFO, "Long-polling activated for %s", lp_url);
while (1) {
json_t *val, *res, *soval;
char *req = NULL;
int err;
if (have_gbt) {
req = malloc(strlen(gbt_lp_req) + strlen(lp_id) + 1);
sprintf(req, gbt_lp_req, lp_id);
}
val = json_rpc_call(curl, lp_url, rpc_userpass,
req ? req : getwork_req, &err,
JSON_RPC_LONGPOLL);
free(req);
if (have_stratum) {
if (val)
json_decref(val);
goto out;
}
if (likely(val)) {
bool rc;
applog(LOG_INFO, "LONGPOLL pushed new work");
res = json_object_get(val, "result");
soval = json_object_get(res, "submitold");
submit_old = soval ? json_is_true(soval) : false;
pthread_mutex_lock(&g_work_lock);
work_free(&g_work);
if (have_gbt)
rc = gbt_work_decode(res, &g_work);
else
rc = work_decode(res, &g_work);
if (rc) {
time(&g_work_time);
restart_threads();
}
pthread_mutex_unlock(&g_work_lock);
json_decref(val);
} else {
pthread_mutex_lock(&g_work_lock);
g_work_time -= LP_SCANTIME;
pthread_mutex_unlock(&g_work_lock);
if (err == CURLE_OPERATION_TIMEDOUT) {
restart_threads();
} else {
have_longpoll = false;
restart_threads();
free(hdr_path);
free(lp_url);
lp_url = NULL;
sleep(opt_fail_pause);
goto start;
}
}
}
out:
free(hdr_path);
free(lp_url);
tq_freeze(mythr->q);
if (curl)
curl_easy_cleanup(curl);
return NULL;
}
static bool stratum_handle_response(char *buf)
{
json_t *val, *err_val, *res_val, *id_val;
json_error_t err;
bool ret = false;
val = JSON_LOADS(buf, &err);
if (!val) {
applog(LOG_INFO, "JSON decode failed(%d): %s", err.line, err.text);
goto out;
}
res_val = json_object_get(val, "result");
err_val = json_object_get(val, "error");
id_val = json_object_get(val, "id");
if (!id_val || json_is_null(id_val) || !res_val)
goto out;
share_result(json_is_true(res_val),
err_val ? json_string_value(json_array_get(err_val, 1)) : NULL);
ret = true;
out:
if (val)
json_decref(val);
return ret;
}
static void *stratum_thread(void *userdata)
{
struct thr_info *mythr = userdata;
char *s;
stratum.url = tq_pop(mythr->q, NULL);
if (!stratum.url)
goto out;
applog(LOG_INFO, "Starting Stratum on %s", stratum.url);
while (1) {
int failures = 0;
while (!stratum.curl) {
pthread_mutex_lock(&g_work_lock);
g_work_time = 0;
pthread_mutex_unlock(&g_work_lock);
restart_threads();
if (!stratum_connect(&stratum, stratum.url) ||
!stratum_subscribe(&stratum) ||
!stratum_authorize(&stratum, rpc_user, rpc_pass)) {
stratum_disconnect(&stratum);
if (opt_retries >= 0 && ++failures > opt_retries) {
applog(LOG_ERR, "...terminating workio thread");
tq_push(thr_info[work_thr_id].q, NULL);
goto out;
}
applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause);
sleep(opt_fail_pause);
}
}
if (stratum.job.job_id &&
(!g_work_time || strcmp(stratum.job.job_id, g_work.job_id))) {
pthread_mutex_lock(&g_work_lock);
stratum_gen_work(&stratum, &g_work);
time(&g_work_time);
pthread_mutex_unlock(&g_work_lock);
if (stratum.job.clean) {
applog(LOG_INFO, "Stratum requested work restart");
restart_threads();
}
}
if (!stratum_socket_full(&stratum, 120)) {
applog(LOG_ERR, "Stratum connection timed out");
s = NULL;
} else
s = stratum_recv_line(&stratum);
if (!s) {
stratum_disconnect(&stratum);
applog(LOG_ERR, "Stratum connection interrupted");
continue;
}
if (!stratum_handle_method(&stratum, s))
stratum_handle_response(s);
free(s);
}
out:
return NULL;
}
static void show_version_and_exit(void)
{
printf(PACKAGE_STRING "\n built on " __DATE__ "\n features:"
#if defined(USE_ASM) && defined(__i386__)
" i386"
#endif
#if defined(USE_ASM) && defined(__x86_64__)
" x86_64"
" PHE"
#endif
#if defined(USE_ASM) && (defined(__i386__) || defined(__x86_64__))
" SSE2"
#endif
#if defined(__x86_64__) && defined(USE_AVX)
" AVX"
#endif
#if defined(__x86_64__) && defined(USE_AVX2)
" AVX2"
#endif
#if defined(__x86_64__) && defined(USE_XOP)
" XOP"
#endif
#if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
" ARM"
#if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \
defined(__ARM_ARCH_5TEJ__) || defined(__ARM_ARCH_6__) || \
defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \
defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6T2__) || \
defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || \
defined(__ARM_ARCH_7__) || \
defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \
defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
" ARMv5E"
#endif
#if defined(__ARM_NEON__)
" NEON"
#endif
#endif
#if defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
" PowerPC"
#if defined(__ALTIVEC__)
" AltiVec"
#endif
#endif
"\n");
printf("%s\n", curl_version());
#ifdef JANSSON_VERSION
printf("libjansson %s\n", JANSSON_VERSION);
#endif
exit(0);
}
static void show_usage_and_exit(int status)
{
if (status)
fprintf(stderr, "Try `" PROGRAM_NAME " --help' for more information.\n");
else
printf(usage);
exit(status);
}
static void strhide(char *s)
{
if (*s) *s++ = 'x';
while (*s) *s++ = '\0';
}
static void parse_config(json_t *config, char *pname, char *ref);
static void parse_arg(int key, char *arg, char *pname)
{
char *p;
int v, i;
switch(key) {
case 'a':
for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
v = strlen(algo_names[i]);
if (!strncmp(arg, algo_names[i], v)) {
if (arg[v] == '\0') {
opt_algo = i;
break;
}
if (arg[v] == ':' && i == ALGO_SCRYPT) {
char *ep;
v = strtol(arg+v+1, &ep, 10);
if (*ep || v & (v-1) || v < 2)
continue;
opt_algo = i;
opt_scrypt_n = v;
break;
}
}
}
if (i == ARRAY_SIZE(algo_names)) {
fprintf(stderr, "%s: unknown algorithm -- '%s'\n",
pname, arg);
show_usage_and_exit(1);
}
break;
case 'B':
opt_background = true;
break;
case 'c': {
json_error_t err;
json_t *config = JSON_LOAD_FILE(arg, &err);
if (!json_is_object(config)) {
if (err.line < 0)
fprintf(stderr, "%s: %s\n", pname, err.text);
else
fprintf(stderr, "%s: %s:%d: %s\n",
pname, arg, err.line, err.text);
exit(1);
}
parse_config(config, pname, arg);
json_decref(config);
break;
}
case 'q':
opt_quiet = true;
break;
case 'D':
opt_debug = true;
break;
case 'p':
free(rpc_pass);
rpc_pass = strdup(arg);
strhide(arg);
break;
case 'P':
opt_protocol = true;
break;
case 'r':
v = atoi(arg);
if (v < -1 || v > 9999) /* sanity check */
show_usage_and_exit(1);
opt_retries = v;
break;
case 'R':
v = atoi(arg);
if (v < 1 || v > 9999) /* sanity check */
show_usage_and_exit(1);
opt_fail_pause = v;
break;
case 's':
v = atoi(arg);
if (v < 1 || v > 9999) /* sanity check */
show_usage_and_exit(1);
opt_scantime = v;
break;
case 'T':
v = atoi(arg);
if (v < 1 || v > 99999) /* sanity check */
show_usage_and_exit(1);
opt_timeout = v;
break;
case 't':
v = atoi(arg);
if (v < 1 || v > 9999) /* sanity check */
show_usage_and_exit(1);
opt_n_threads = v;
break;
case 'u':
free(rpc_user);
rpc_user = strdup(arg);
break;
case 'o': { /* --url */
char *ap, *hp;
ap = strstr(arg, "://");
ap = ap ? ap + 3 : arg;
hp = strrchr(arg, '@');
if (hp) {
*hp = '\0';
p = strchr(ap, ':');
if (p) {
free(rpc_userpass);
rpc_userpass = strdup(ap);
free(rpc_user);
rpc_user = calloc(p - ap + 1, 1);
strncpy(rpc_user, ap, p - ap);
free(rpc_pass);
rpc_pass = strdup(++p);
if (*p) *p++ = 'x';
v = strlen(hp + 1) + 1;
memmove(p + 1, hp + 1, v);
memset(p + v, 0, hp - p);
hp = p;
} else {
free(rpc_user);
rpc_user = strdup(ap);
}
*hp++ = '@';
} else
hp = ap;
if (ap != arg) {
if (strncasecmp(arg, "http://", 7) &&
strncasecmp(arg, "https://", 8) &&
strncasecmp(arg, "stratum+tcp://", 14) &&
strncasecmp(arg, "stratum+tcps://", 15)) {
fprintf(stderr, "%s: unknown protocol -- '%s'\n",
pname, arg);
show_usage_and_exit(1);
}
free(rpc_url);
rpc_url = strdup(arg);
strcpy(rpc_url + (ap - arg), hp);
} else {
if (*hp == '\0' || *hp == '/') {
fprintf(stderr, "%s: invalid URL -- '%s'\n",
pname, arg);
show_usage_and_exit(1);
}
free(rpc_url);
rpc_url = malloc(strlen(hp) + 8);
sprintf(rpc_url, "http://%s", hp);
}
have_stratum = !opt_benchmark && !strncasecmp(rpc_url, "stratum", 7);
break;
}
case 'O': /* --userpass */
p = strchr(arg, ':');
if (!p) {
fprintf(stderr, "%s: invalid username:password pair -- '%s'\n",
pname, arg);
show_usage_and_exit(1);
}
free(rpc_userpass);
rpc_userpass = strdup(arg);
free(rpc_user);
rpc_user = calloc(p - arg + 1, 1);
strncpy(rpc_user, arg, p - arg);
free(rpc_pass);
rpc_pass = strdup(++p);
strhide(p);
break;
case 'x': /* --proxy */
if (!strncasecmp(arg, "socks4://", 9))
opt_proxy_type = CURLPROXY_SOCKS4;
else if (!strncasecmp(arg, "socks5://", 9))
opt_proxy_type = CURLPROXY_SOCKS5;
#if LIBCURL_VERSION_NUM >= 0x071200
else if (!strncasecmp(arg, "socks4a://", 10))
opt_proxy_type = CURLPROXY_SOCKS4A;
else if (!strncasecmp(arg, "socks5h://", 10))
opt_proxy_type = CURLPROXY_SOCKS5_HOSTNAME;
#endif
else
opt_proxy_type = CURLPROXY_HTTP;
free(opt_proxy);
opt_proxy = strdup(arg);
break;
case 1001:
free(opt_cert);
opt_cert = strdup(arg);
break;
case 1005:
opt_benchmark = true;
want_longpoll = false;
want_stratum = false;
have_stratum = false;
break;
case 1003:
want_longpoll = false;
break;
case 1007:
want_stratum = false;
break;
case 1009:
opt_redirect = false;
break;
case 1010:
allow_getwork = false;
break;
case 1011:
have_gbt = false;
break;
case 1013: /* --coinbase-addr */
pk_script_size = address_to_script(pk_script, sizeof(pk_script), arg);
if (!pk_script_size) {
fprintf(stderr, "%s: invalid address -- '%s'\n",
pname, arg);
show_usage_and_exit(1);
}
break;
case 1015: /* --coinbase-sig */
if (strlen(arg) + 1 > sizeof(coinbase_sig)) {
fprintf(stderr, "%s: coinbase signature too long\n", pname);
show_usage_and_exit(1);
}
strcpy(coinbase_sig, arg);
break;
case 'S':
use_syslog = true;
break;
case 'V':
show_version_and_exit();
case 'h':
show_usage_and_exit(0);
default:
show_usage_and_exit(1);
}
}
static void parse_config(json_t *config, char *pname, char *ref)
{
int i;
char *s;
json_t *val;
for (i = 0; i < ARRAY_SIZE(options); i++) {
if (!options[i].name)
break;
val = json_object_get(config, options[i].name);
if (!val)
continue;
if (options[i].has_arg && json_is_string(val)) {
if (!strcmp(options[i].name, "config")) {
fprintf(stderr, "%s: %s: option '%s' not allowed here\n",
pname, ref, options[i].name);
exit(1);
}
s = strdup(json_string_value(val));
if (!s)
break;
parse_arg(options[i].val, s, pname);
free(s);
} else if (!options[i].has_arg && json_is_true(val)) {
parse_arg(options[i].val, "", pname);
} else {
fprintf(stderr, "%s: invalid argument for option '%s'\n",
pname, options[i].name);
exit(1);
}
}
}
static void parse_cmdline(int argc, char *argv[])
{
int key;
while (1) {
#if HAVE_GETOPT_LONG
key = getopt_long(argc, argv, short_options, options, NULL);
#else
key = getopt(argc, argv, short_options);
#endif
if (key < 0)
break;
parse_arg(key, optarg, argv[0]);
}
if (optind < argc) {
fprintf(stderr, "%s: unsupported non-option argument -- '%s'\n",
argv[0], argv[optind]);
show_usage_and_exit(1);
}
}
#ifndef WIN32
static void signal_handler(int sig)
{
switch (sig) {
case SIGHUP:
applog(LOG_INFO, "SIGHUP received");
break;
case SIGINT:
applog(LOG_INFO, "SIGINT received, exiting");
exit(0);
break;
case SIGTERM:
applog(LOG_INFO, "SIGTERM received, exiting");
exit(0);
break;
}
}
#endif
int main(int argc, char *argv[])
{
struct thr_info *thr;
long flags;
int i;
rpc_user = strdup("");
rpc_pass = strdup("");
/* parse command line */
parse_cmdline(argc, argv);
if (!opt_benchmark && !rpc_url) {
fprintf(stderr, "%s: no URL supplied\n", argv[0]);
show_usage_and_exit(1);
}
if (!rpc_userpass) {
rpc_userpass = malloc(strlen(rpc_user) + strlen(rpc_pass) + 2);
if (!rpc_userpass)
return 1;
sprintf(rpc_userpass, "%s:%s", rpc_user, rpc_pass);
}
pthread_mutex_init(&applog_lock, NULL);
pthread_mutex_init(&stats_lock, NULL);
pthread_mutex_init(&g_work_lock, NULL);
pthread_mutex_init(&stratum.sock_lock, NULL);
pthread_mutex_init(&stratum.work_lock, NULL);
flags = opt_benchmark || (strncasecmp(rpc_url, "https://", 8) &&
strncasecmp(rpc_url, "stratum+tcps://", 15))
? (CURL_GLOBAL_ALL & ~CURL_GLOBAL_SSL)
: CURL_GLOBAL_ALL;
if (curl_global_init(flags)) {
applog(LOG_ERR, "CURL initialization failed");
return 1;
}
#ifndef WIN32
if (opt_background) {
i = fork();
if (i < 0) exit(1);
if (i > 0) exit(0);
i = setsid();
if (i < 0)
applog(LOG_ERR, "setsid() failed (errno = %d)", errno);
i = chdir("/");
if (i < 0)
applog(LOG_ERR, "chdir() failed (errno = %d)", errno);
signal(SIGHUP, signal_handler);
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
}
#endif
#if defined(WIN32)
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
num_processors = sysinfo.dwNumberOfProcessors;
#elif defined(_SC_NPROCESSORS_CONF)
num_processors = sysconf(_SC_NPROCESSORS_CONF);
#elif defined(CTL_HW) && defined(HW_NCPU)
int req[] = { CTL_HW, HW_NCPU };
size_t len = sizeof(num_processors);
sysctl(req, 2, &num_processors, &len, NULL, 0);
#else
num_processors = 1;
#endif
if (num_processors < 1)
num_processors = 1;
if (!opt_n_threads)
opt_n_threads = num_processors;
#ifdef HAVE_SYSLOG_H
if (use_syslog)
openlog("cpuminer", LOG_PID, LOG_USER);
#endif
work_restart = calloc(opt_n_threads, sizeof(*work_restart));
if (!work_restart)
return 1;
thr_info = calloc(opt_n_threads + 3, sizeof(*thr));
if (!thr_info)
return 1;
thr_hashrates = (double *) calloc(opt_n_threads, sizeof(double));
if (!thr_hashrates)
return 1;
/* init workio thread info */
work_thr_id = opt_n_threads;
thr = &thr_info[work_thr_id];
thr->id = work_thr_id;
thr->q = tq_new();
if (!thr->q)
return 1;
/* start work I/O thread */
if (pthread_create(&thr->pth, NULL, workio_thread, thr)) {
applog(LOG_ERR, "workio thread create failed");
return 1;
}
if (want_longpoll && !have_stratum) {
/* init longpoll thread info */
longpoll_thr_id = opt_n_threads + 1;
thr = &thr_info[longpoll_thr_id];
thr->id = longpoll_thr_id;
thr->q = tq_new();
if (!thr->q)
return 1;
/* start longpoll thread */
if (unlikely(pthread_create(&thr->pth, NULL, longpoll_thread, thr))) {
applog(LOG_ERR, "longpoll thread create failed");
return 1;
}
}
if (want_stratum) {
/* init stratum thread info */
stratum_thr_id = opt_n_threads + 2;
thr = &thr_info[stratum_thr_id];
thr->id = stratum_thr_id;
thr->q = tq_new();
if (!thr->q)
return 1;
/* start stratum thread */
if (unlikely(pthread_create(&thr->pth, NULL, stratum_thread, thr))) {
applog(LOG_ERR, "stratum thread create failed");
return 1;
}
if (have_stratum)
tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
}
/* start mining threads */
for (i = 0; i < opt_n_threads; i++) {
thr = &thr_info[i];
thr->id = i;
thr->q = tq_new();
if (!thr->q)
return 1;
if (unlikely(pthread_create(&thr->pth, NULL, miner_thread, thr))) {
applog(LOG_ERR, "thread %d create failed", i);
return 1;
}
}
applog(LOG_INFO, "%d miner threads started, "
"using '%s' algorithm.",
opt_n_threads,
algo_names[opt_algo]);
/* main loop - simply wait for workio thread to exit */
pthread_join(thr_info[work_thr_id].pth, NULL);
applog(LOG_INFO, "workio thread dead, exiting.");
return 0;
}
0707010000001E000081A4000003E800000064000000015EF4BCA100001BD9000000000000000000000000000000000000001700000000cpuminer-2.5.1/elist.h#ifndef _LINUX_LIST_H
#define _LINUX_LIST_H
/*
* Simple doubly linked list implementation.
*
* Some of the internal functions ("__xxx") are useful when
* manipulating whole lists rather than single entries, as
* sometimes we already know the next/prev entries and we can
* generate better code by using them directly rather than
* using the generic single-entry routines.
*/
struct list_head {
struct list_head *next, *prev;
};
#define LIST_HEAD_INIT(name) { &(name), &(name) }
#define LIST_HEAD(name) \
struct list_head name = LIST_HEAD_INIT(name)
#define INIT_LIST_HEAD(ptr) do { \
(ptr)->next = (ptr); (ptr)->prev = (ptr); \
} while (0)
/*
* Insert a new entry between two known consecutive entries.
*
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
static inline void __list_add(struct list_head *new,
struct list_head *prev,
struct list_head *next)
{
next->prev = new;
new->next = next;
new->prev = prev;
prev->next = new;
}
/**
* list_add - add a new entry
* @new: new entry to be added
* @head: list head to add it after
*
* Insert a new entry after the specified head.
* This is good for implementing stacks.
*/
static inline void list_add(struct list_head *new, struct list_head *head)
{
__list_add(new, head, head->next);
}
/**
* list_add_tail - add a new entry
* @new: new entry to be added
* @head: list head to add it before
*
* Insert a new entry before the specified head.
* This is useful for implementing queues.
*/
static inline void list_add_tail(struct list_head *new, struct list_head *head)
{
__list_add(new, head->prev, head);
}
/*
* Delete a list entry by making the prev/next entries
* point to each other.
*
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
static inline void __list_del(struct list_head *prev, struct list_head *next)
{
next->prev = prev;
prev->next = next;
}
/**
* list_del - deletes entry from list.
* @entry: the element to delete from the list.
* Note: list_empty on entry does not return true after this, the entry is in an undefined state.
*/
static inline void list_del(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
entry->next = (void *) 0;
entry->prev = (void *) 0;
}
/**
* list_del_init - deletes entry from list and reinitialize it.
* @entry: the element to delete from the list.
*/
static inline void list_del_init(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
INIT_LIST_HEAD(entry);
}
/**
* list_move - delete from one list and add as another's head
* @list: the entry to move
* @head: the head that will precede our entry
*/
static inline void list_move(struct list_head *list, struct list_head *head)
{
__list_del(list->prev, list->next);
list_add(list, head);
}
/**
* list_move_tail - delete from one list and add as another's tail
* @list: the entry to move
* @head: the head that will follow our entry
*/
static inline void list_move_tail(struct list_head *list,
struct list_head *head)
{
__list_del(list->prev, list->next);
list_add_tail(list, head);
}
/**
* list_empty - tests whether a list is empty
* @head: the list to test.
*/
static inline int list_empty(struct list_head *head)
{
return head->next == head;
}
static inline void __list_splice(struct list_head *list,
struct list_head *head)
{
struct list_head *first = list->next;
struct list_head *last = list->prev;
struct list_head *at = head->next;
first->prev = head;
head->next = first;
last->next = at;
at->prev = last;
}
/**
* list_splice - join two lists
* @list: the new list to add.
* @head: the place to add it in the first list.
*/
static inline void list_splice(struct list_head *list, struct list_head *head)
{
if (!list_empty(list))
__list_splice(list, head);
}
/**
* list_splice_init - join two lists and reinitialise the emptied list.
* @list: the new list to add.
* @head: the place to add it in the first list.
*
* The list at @list is reinitialised
*/
static inline void list_splice_init(struct list_head *list,
struct list_head *head)
{
if (!list_empty(list)) {
__list_splice(list, head);
INIT_LIST_HEAD(list);
}
}
/**
* list_entry - get the struct for this entry
* @ptr: the &struct list_head pointer.
* @type: the type of the struct this is embedded in.
* @member: the name of the list_struct within the struct.
*/
#define list_entry(ptr, type, member) \
((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
/**
* list_for_each - iterate over a list
* @pos: the &struct list_head to use as a loop counter.
* @head: the head for your list.
*/
#define list_for_each(pos, head) \
for (pos = (head)->next; pos != (head); \
pos = pos->next)
/**
* list_for_each_prev - iterate over a list backwards
* @pos: the &struct list_head to use as a loop counter.
* @head: the head for your list.
*/
#define list_for_each_prev(pos, head) \
for (pos = (head)->prev; pos != (head); \
pos = pos->prev)
/**
* list_for_each_safe - iterate over a list safe against removal of list entry
* @pos: the &struct list_head to use as a loop counter.
* @n: another &struct list_head to use as temporary storage
* @head: the head for your list.
*/
#define list_for_each_safe(pos, n, head) \
for (pos = (head)->next, n = pos->next; pos != (head); \
pos = n, n = pos->next)
/**
* list_for_each_entry - iterate over list of given type
* @pos: the type * to use as a loop counter.
* @head: the head for your list.
* @member: the name of the list_struct within the struct.
* @type: the type of the struct.
*/
#define list_for_each_entry(pos, head, member, type) \
for (pos = list_entry((head)->next, type, member); \
&pos->member != (head); \
pos = list_entry(pos->member.next, type, member))
/**
* list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
* @pos: the type * to use as a loop counter.
* @n: another type * to use as temporary storage
* @head: the head for your list.
* @member: the name of the list_struct within the struct.
* @type: the type of the struct.
*/
#define list_for_each_entry_safe(pos, n, head, member, type) \
for (pos = list_entry((head)->next, type, member), \
n = list_entry(pos->member.next, type, member); \
&pos->member != (head); \
pos = n, n = list_entry(n->member.next, type, member))
/**
* list_for_each_entry_continue - iterate over list of given type
* continuing after existing point
* @pos: the type * to use as a loop counter.
* @head: the head for your list.
* @member: the name of the list_struct within the struct.
* @type: the type of the struct.
*/
#define list_for_each_entry_continue(pos, head, member, type) \
for (pos = list_entry(pos->member.next, type, member), \
prefetch(pos->member.next); \
&pos->member != (head); \
pos = list_entry(pos->member.next, type, member), \
prefetch(pos->member.next))
#endif
0707010000001F000081A4000003E800000064000000015EF4BCA100000101000000000000000000000000000000000000002000000000cpuminer-2.5.1/example-cfg.json{
"_comment1" : "Any long-format command line argument ",
"_comment2" : "may be used in this JSON configuration file",
"url" : "http://127.0.0.1:9332/",
"user" : "rpcuser",
"pass" : "rpcpass",
"algo" : "scrypt",
"threads" : "4",
"quiet" : true
}
07070100000020000081A4000003E800000064000000015EF4BCA100001B0D000000000000000000000000000000000000001700000000cpuminer-2.5.1/miner.h#ifndef __MINER_H__
#define __MINER_H__
#include "cpuminer-config.h"
#include <stdbool.h>
#include <inttypes.h>
#include <sys/time.h>
#include <pthread.h>
#include <jansson.h>
#include <curl/curl.h>
#ifdef STDC_HEADERS
# include <stdlib.h>
# include <stddef.h>
#else
# ifdef HAVE_STDLIB_H
# include <stdlib.h>
# endif
#endif
#ifdef HAVE_ALLOCA_H
# include <alloca.h>
#elif !defined alloca
# ifdef __GNUC__
# define alloca __builtin_alloca
# elif defined _AIX
# define alloca __alloca
# elif defined _MSC_VER
# include <malloc.h>
# define alloca _alloca
# elif !defined HAVE_ALLOCA
# ifdef __cplusplus
extern "C"
# endif
void *alloca (size_t);
# endif
#endif
#ifdef HAVE_SYSLOG_H
#include <syslog.h>
#else
enum {
LOG_ERR,
LOG_WARNING,
LOG_NOTICE,
LOG_INFO,
LOG_DEBUG,
};
#endif
#undef unlikely
#undef likely
#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
#define unlikely(expr) (__builtin_expect(!!(expr), 0))
#define likely(expr) (__builtin_expect(!!(expr), 1))
#else
#define unlikely(expr) (expr)
#define likely(expr) (expr)
#endif
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
#if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
#define WANT_BUILTIN_BSWAP
#else
#define bswap_32(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \
| (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
#endif
static inline uint32_t swab32(uint32_t v)
{
#ifdef WANT_BUILTIN_BSWAP
return __builtin_bswap32(v);
#else
return bswap_32(v);
#endif
}
#ifdef HAVE_SYS_ENDIAN_H
#include <sys/endian.h>
#endif
#if !HAVE_DECL_BE32DEC
static inline uint32_t be32dec(const void *pp)
{
const uint8_t *p = (uint8_t const *)pp;
return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
}
#endif
#if !HAVE_DECL_LE32DEC
static inline uint32_t le32dec(const void *pp)
{
const uint8_t *p = (uint8_t const *)pp;
return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
}
#endif
#if !HAVE_DECL_BE32ENC
static inline void be32enc(void *pp, uint32_t x)
{
uint8_t *p = (uint8_t *)pp;
p[3] = x & 0xff;
p[2] = (x >> 8) & 0xff;
p[1] = (x >> 16) & 0xff;
p[0] = (x >> 24) & 0xff;
}
#endif
#if !HAVE_DECL_LE32ENC
static inline void le32enc(void *pp, uint32_t x)
{
uint8_t *p = (uint8_t *)pp;
p[0] = x & 0xff;
p[1] = (x >> 8) & 0xff;
p[2] = (x >> 16) & 0xff;
p[3] = (x >> 24) & 0xff;
}
#endif
#if JANSSON_MAJOR_VERSION >= 2
#define JSON_LOADS(str, err_ptr) json_loads(str, 0, err_ptr)
#define JSON_LOAD_FILE(path, err_ptr) json_load_file(path, 0, err_ptr)
#else
#define JSON_LOADS(str, err_ptr) json_loads(str, err_ptr)
#define JSON_LOAD_FILE(path, err_ptr) json_load_file(path, err_ptr)
#endif
#define USER_AGENT PACKAGE_NAME "/" PACKAGE_VERSION
void sha256_init(uint32_t *state);
void sha256_transform(uint32_t *state, const uint32_t *block, int swap);
void sha256d(unsigned char *hash, const unsigned char *data, int len);
#ifdef USE_ASM
#if defined(__ARM_NEON__) || defined(__ALTIVEC__) || defined(__i386__) || defined(__x86_64__)
#define HAVE_SHA256_4WAY 1
int sha256_use_4way();
void sha256_init_4way(uint32_t *state);
void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap);
#endif
#if defined(__x86_64__) && defined(USE_AVX2)
#define HAVE_SHA256_8WAY 1
int sha256_use_8way();
void sha256_init_8way(uint32_t *state);
void sha256_transform_8way(uint32_t *state, const uint32_t *block, int swap);
#endif
#endif
extern int scanhash_sha256d(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done);
extern unsigned char *scrypt_buffer_alloc(int N);
extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
unsigned char *scratchbuf, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done, int N);
struct thr_info {
int id;
pthread_t pth;
struct thread_q *q;
};
struct work_restart {
volatile unsigned long restart;
char padding[128 - sizeof(unsigned long)];
};
extern bool opt_debug;
extern bool opt_protocol;
extern bool opt_redirect;
extern int opt_timeout;
extern bool want_longpoll;
extern bool have_longpoll;
extern bool have_gbt;
extern bool allow_getwork;
extern bool want_stratum;
extern bool have_stratum;
extern char *opt_cert;
extern char *opt_proxy;
extern long opt_proxy_type;
extern bool use_syslog;
extern pthread_mutex_t applog_lock;
extern struct thr_info *thr_info;
extern int longpoll_thr_id;
extern int stratum_thr_id;
extern struct work_restart *work_restart;
#define JSON_RPC_LONGPOLL (1 << 0)
#define JSON_RPC_QUIET_404 (1 << 1)
extern void applog(int prio, const char *fmt, ...);
extern json_t *json_rpc_call(CURL *curl, const char *url, const char *userpass,
const char *rpc_req, int *curl_err, int flags);
void memrev(unsigned char *p, size_t len);
extern void bin2hex(char *s, const unsigned char *p, size_t len);
extern char *abin2hex(const unsigned char *p, size_t len);
extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len);
extern int varint_encode(unsigned char *p, uint64_t n);
extern size_t address_to_script(unsigned char *out, size_t outsz, const char *addr);
extern int timeval_subtract(struct timeval *result, struct timeval *x,
struct timeval *y);
extern bool fulltest(const uint32_t *hash, const uint32_t *target);
extern void diff_to_target(uint32_t *target, double diff);
struct stratum_job {
char *job_id;
unsigned char prevhash[32];
size_t coinbase_size;
unsigned char *coinbase;
unsigned char *xnonce2;
int merkle_count;
unsigned char **merkle;
unsigned char version[4];
unsigned char nbits[4];
unsigned char ntime[4];
bool clean;
double diff;
};
struct stratum_ctx {
char *url;
CURL *curl;
char *curl_url;
char curl_err_str[CURL_ERROR_SIZE];
curl_socket_t sock;
size_t sockbuf_size;
char *sockbuf;
pthread_mutex_t sock_lock;
double next_diff;
char *session_id;
size_t xnonce1_size;
unsigned char *xnonce1;
size_t xnonce2_size;
struct stratum_job job;
pthread_mutex_t work_lock;
};
bool stratum_socket_full(struct stratum_ctx *sctx, int timeout);
bool stratum_send_line(struct stratum_ctx *sctx, char *s);
char *stratum_recv_line(struct stratum_ctx *sctx);
bool stratum_connect(struct stratum_ctx *sctx, const char *url);
void stratum_disconnect(struct stratum_ctx *sctx);
bool stratum_subscribe(struct stratum_ctx *sctx);
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
struct thread_q;
extern struct thread_q *tq_new(void);
extern void tq_free(struct thread_q *tq);
extern bool tq_push(struct thread_q *tq, void *data);
extern void *tq_pop(struct thread_q *tq, const struct timespec *abstime);
extern void tq_freeze(struct thread_q *tq);
extern void tq_thaw(struct thread_q *tq);
#endif /* __MINER_H__ */
07070100000021000081A4000003E800000064000000015EF4BCA100001DB7000000000000000000000000000000000000001800000000cpuminer-2.5.1/minerd.1.TH MINERD 1 "June 2020" "cpuminer 2.5.1"
.SH NAME
minerd \- CPU miner for Bitcoin and Litecoin
.SH SYNOPSIS
.B minerd
[\fIOPTION\fR]...
.SH DESCRIPTION
.B minerd
is a multi-threaded CPU miner for Bitcoin, Litecoin and other cryptocurrencies.
It supports the getwork and getblocktemplate (BIP 22) methods,
as well as the Stratum mining protocol.
.PP
In its normal mode of operation, \fBminerd\fR connects to a mining server
(specified with the \fB\-o\fR option), receives work from it and starts hashing.
As soon as a solution is found, it is submitted to the same mining server,
which can accept or reject it.
When using getwork or getblocktemplate,
\fBminerd\fR can take advantage of long polling, if the server supports it;
in any case, fresh work is fetched as needed.
When using the Stratum protocol this is not possible,
and the server is responsible for sending fresh work at least every minute;
if it fails to do so,
\fBminerd\fR may drop the connection and try reconnecting again.
.PP
By default, \fBminerd\fR writes all its messages to standard error.
On systems that have a syslog, the \fB\-\-syslog\fR option can be used
to write to it instead.
.PP
On start, the nice value of all miner threads is set to 19.
On Linux, the scheduling policy is also changed to SCHED_IDLE,
or to SCHED_BATCH if that fails.
On multiprocessor systems, \fBminerd\fR
automatically sets the CPU affinity of miner threads
if the number of threads is a multiple of the number of processors.
.SH EXAMPLES
To connect to a Litecoin mining pool that provides a Stratum server
at example.com on port 3333, authenticating as worker "foo" with password "bar":
.PP
.nf
.RS
minerd \-o stratum+tcp://example.com:3333 \-O foo:bar
.RE
.fi
.PP
To mine to a local Bitcoin testnet instance running on port 18332,
authenticating with username "rpcuser" and password "rpcpass":
.PP
.nf
.RS
minerd \-a sha256d \-o http://localhost:18332 \-O rpcuser:rpcpass \\
\-\-coinbase\-addr=mpXwg4jMtRhuSpVq4xS3HFHmCmWp9NyGKt
.RE
.fi
.PP
To connect to a Litecoin P2Pool node running on my.server on port 9327,
mining in the background and having output sent to the syslog facility,
omitting the per-thread hashmeter output:
.PP
.nf
.RS
minerd \-BSq \-o http://my.server:9327
.RE
.fi
.SH OPTIONS
.TP
\fB\-a\fR, \fB\-\-algo\fR=\fIALGORITHM\fR
Set the hashing algorithm to use.
Default is scrypt.
Possible values are:
.RS 11
.TP 10
.B scrypt
scrypt(1024, 1, 1) (used by Litecoin)
.TP
.B scrypt:\fIN\fR
scrypt(\fIN\fR, 1, 1) (\fIN\fR must be a power of 2 greater than 1)
.TP
.B sha256d
SHA-256d (used by Bitcoin)
.RE
.TP
\fB\-\-benchmark\fR
Run in offline benchmark mode.
.TP
\fB\-B\fR, \fB\-\-background\fR
Run in the background as a daemon.
.TP
\fB\-\-cert\fR=\fIFILE\fR
Set an SSL certificate to use with the mining server.
Only supported when using the HTTPS protocol.
.TP
\fB\-\-coinbase\-addr\fR=\fIADDRESS\fR
Set a payout address for solo mining.
This is only used in getblocktemplate mode,
and only if the server does not provide a coinbase transaction.
It can be either a base-58 address, or a bech32 address (BIP 173).
.TP
\fB\-\-coinbase\-sig\fR=\fITEXT\fR
Set a string to be included in the coinbase (if allowed by the server).
This is only used in getblocktemplate mode.
.TP
\fB\-c\fR, \fB\-\-config\fR=\fIFILE\fR
Load options from a configuration file.
\fIFILE\fR must contain a JSON object
mapping long options to their arguments (as strings),
or to \fBtrue\fR if no argument is required.
Sample configuration file:
.nf
{
"url": "stratum+tcp://example.com:3333",
"userpass": "foo:bar",
"retry-pause": "10",
"quiet": true
}
.fi
.TP
\fB\-D\fR, \fB\-\-debug\fR
Enable debug output.
.TP
\fB\-h\fR, \fB\-\-help\fR
Print a help message and exit.
.TP
\fB\-\-no\-gbt\fR
Do not use the getblocktemplate RPC method.
.TP
\fB\-\-no\-getwork\fR
Do not use the getwork RPC method.
.TP
\fB\-\-no\-longpoll\fR
Do not use long polling.
.TP
\fB\-\-no\-redirect\fR
Ignore requests from the server to switch to a different URL.
.TP
\fB\-\-no\-stratum\fR
Do not switch to Stratum, even if the server advertises support for it.
.TP
\fB\-o\fR, \fB\-\-url\fR=[\fISCHEME\fR://][\fIUSERNAME\fR[:\fIPASSWORD\fR]@]\fIHOST\fR:\fIPORT\fR[/\fIPATH\fR]
Set the URL of the mining server to connect to.
Supported schemes are \fBhttp\fR, \fBhttps\fR, \fBstratum+tcp\fR
and \fBstratum+tcps\fR.
If no scheme is specified, http is assumed.
Specifying a \fIPATH\fR is only supported for HTTP and HTTPS.
Specifying credentials has the same effect as using the \fB\-O\fR option.
By default, on HTTP and HTTPS,
the miner tries to use the getblocktemplate RPC method,
and falls back to using getwork if getblocktemplate is unavailable.
This behavior can be modified by using the \fB\-\-no\-gbt\fR
and \fB\-\-no\-getwork\fR options.
.TP
\fB\-O\fR, \fB\-\-userpass\fR=\fIUSERNAME\fR:\fIPASSWORD\fR
Set the credentials to use for connecting to the mining server.
Any value previously set with \fB\-u\fR or \fB\-p\fR is discarded.
.TP
\fB\-p\fR, \fB\-\-pass\fR=\fIPASSWORD\fR
Set the password to use for connecting to the mining server.
Any password previously set with \fB\-O\fR is discarded.
.TP
\fB\-P\fR, \fB\-\-protocol\-dump\fR
Enable output of all protocol-level activities.
.TP
\fB\-q\fR, \fB\-\-quiet\fR
Disable per-thread hashmeter output.
.TP
\fB\-r\fR, \fB\-\-retries\fR=\fIN\fR
Set the maximum number of times to retry if a network call fails.
If not specified, the miner will retry indefinitely.
.TP
\fB\-R\fR, \fB\-\-retry\-pause\fR=\fISECONDS\fR
Set how long to wait between retries. Default is 30 seconds.
.TP
\fB\-s\fR, \fB\-\-scantime\fR=\fISECONDS\fR
Set an upper bound on the time the miner can go without fetching fresh work.
This setting has no effect in Stratum mode or when long polling is activated.
Default is 5 seconds.
.TP
\fB\-S\fR, \fB\-\-syslog\fR
Log to the syslog facility instead of standard error.
.TP
\fB\-t\fR, \fB\-\-threads\fR=\fIN\fR
Set the number of miner threads.
If not specified, the miner will try to detect the number of available processors
and use that.
.TP
\fB\-T\fR, \fB\-\-timeout\fR=\fISECONDS\fR
Set a timeout for long polling.
.TP
\fB\-u\fR, \fB\-\-user\fR=\fIUSERNAME\fR
Set the username to use for connecting to the mining server.
Any username previously set with \fB\-O\fR is discarded.
.TP
\fB\-V\fR, \fB\-\-version\fR
Display version information and quit.
.TP
\fB\-x\fR, \fB\-\-proxy\fR=[\fISCHEME\fR://][\fIUSERNAME\fR:\fIPASSWORD\fR@]\fIHOST\fR:\fIPORT\fR
Connect to the mining server through a proxy.
Supported schemes are: \fBhttp\fR, \fBsocks4\fR, \fBsocks5\fR.
Since libcurl 7.18.0, the following are also supported:
\fBsocks4a\fR, \fBsocks5h\fR (SOCKS5 with remote name resolving).
If no scheme is specified, the proxy is treated as an HTTP proxy.
.SH ENVIRONMENT
The following environment variables can be specified in lower case or upper case;
the lower-case version has precedence. \fBhttp_proxy\fR is an exception
as it is only available in lower case.
.PP
.RS
.TP
\fBhttp_proxy\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR
Sets the proxy server to use for HTTP.
.TP
\fBHTTPS_PROXY\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR
Sets the proxy server to use for HTTPS.
.TP
\fBALL_PROXY\fR [\fISCHEME\fR://]\fIHOST\fR:\fIPORT\fR
Sets the proxy server to use if no protocol-specific proxy is set.
.RE
.PP
Using an environment variable to set the proxy has the same effect as
using the \fB\-x\fR option.
.SH AUTHOR
Most of the code in the current version of minerd was written by
Pooler <pooler@litecoinpool.org> with contributions from others.
The original minerd was written by Jeff Garzik <jeff@garzik.org>.
07070100000022000081ED000003E800000064000000015EF4BCA100000432000000000000000000000000000000000000001A00000000cpuminer-2.5.1/nomacro.pl#!/usr/bin/perl
# Copyright 2012, 2015 pooler@litecoinpool.org
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version. See COPYING for more details.
#
# nomacro.pl - expand assembler macros.
use strict;
foreach my $f (<*.S>) {
rename $f, "$f.orig" unless -e "$f.orig";
open FIN, "$f.orig";
open FOUT, ">$f";
my %macros = ();
my %m = ();
while (<FIN>) {
if (m/^\.macro\s+(\w+)\s*(.*)$/) {
$m{name} = $1;
@m{args} = [split /\s*,\s*/, $2];
$m{body} = "";
next;
}
if (m/^\.endm/) {
$macros{$m{name}} = {%m};
%m = ();
next;
}
for my $n (keys %macros) {
if (m/^\s*$n\b\s*(.*)$/) {
my @a = split /\s*,\s*/, $1;
$_ = $macros{$n}{body};
for my $i (0 .. $#{$macros{$n}{args}}) {
s/\\$macros{$n}{args}[$i]\b/$a[$i]/g;
}
last;
}
}
if (%m) {
$m{body} .= $_;
next;
}
print FOUT;
}
close FOUT;
close FIN;
}
07070100000023000081A4000003E800000064000000015EF4BCA100005A43000000000000000000000000000000000000001C00000000cpuminer-2.5.1/scrypt-arm.S/*
* Copyright 2012, 2014 pooler@litecoinpool.org
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include "cpuminer-config.h"
#if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
#if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \
defined(__ARM_ARCH_5TEJ__) || defined(__ARM_ARCH_6__) || \
defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \
defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6T2__) || \
defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__)
#define __ARM_ARCH_5E_OR_6__
#endif
#if defined(__ARM_ARCH_5E_OR_6__) || defined(__ARM_ARCH_7__) || \
defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \
defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
#define __ARM_ARCH_5E_OR_6_OR_7__
#endif
#ifdef __ARM_ARCH_5E_OR_6__
.macro scrypt_shuffle
add lr, r0, #9*4
ldmia r0, {r2-r7}
ldmia lr, {r2, r8-r12, lr}
str r3, [r0, #5*4]
str r5, [r0, #15*4]
str r6, [r0, #12*4]
str r7, [r0, #1*4]
ldr r5, [r0, #7*4]
str r2, [r0, #13*4]
str r8, [r0, #2*4]
strd r4, [r0, #10*4]
str r9, [r0, #7*4]
str r10, [r0, #4*4]
str r11, [r0, #9*4]
str lr, [r0, #3*4]
add r2, r0, #64+0*4
add lr, r0, #64+9*4
ldmia r2, {r2-r7}
ldmia lr, {r2, r8-r12, lr}
str r3, [r0, #64+5*4]
str r5, [r0, #64+15*4]
str r6, [r0, #64+12*4]
str r7, [r0, #64+1*4]
ldr r5, [r0, #64+7*4]
str r2, [r0, #64+13*4]
str r8, [r0, #64+2*4]
strd r4, [r0, #64+10*4]
str r9, [r0, #64+7*4]
str r10, [r0, #64+4*4]
str r11, [r0, #64+9*4]
str lr, [r0, #64+3*4]
.endm
.macro salsa8_core_doubleround_body
add r6, r2, r6
add r7, r3, r7
eor r10, r10, r6, ror #25
add r6, r0, r4
eor r11, r11, r7, ror #25
add r7, r1, r5
strd r10, [sp, #14*4]
eor r12, r12, r6, ror #25
eor lr, lr, r7, ror #25
ldrd r6, [sp, #10*4]
add r2, r10, r2
add r3, r11, r3
eor r6, r6, r2, ror #23
add r2, r12, r0
eor r7, r7, r3, ror #23
add r3, lr, r1
strd r6, [sp, #10*4]
eor r8, r8, r2, ror #23
eor r9, r9, r3, ror #23
ldrd r2, [sp, #6*4]
add r10, r6, r10
add r11, r7, r11
eor r2, r2, r10, ror #19
add r10, r8, r12
eor r3, r3, r11, ror #19
add r11, r9, lr
eor r4, r4, r10, ror #19
eor r5, r5, r11, ror #19
ldrd r10, [sp, #2*4]
add r6, r2, r6
add r7, r3, r7
eor r10, r10, r6, ror #14
add r6, r4, r8
eor r11, r11, r7, ror #14
add r7, r5, r9
eor r0, r0, r6, ror #14
eor r1, r1, r7, ror #14
ldrd r6, [sp, #14*4]
strd r2, [sp, #6*4]
strd r10, [sp, #2*4]
add r6, r11, r6
add r7, r0, r7
eor r4, r4, r6, ror #25
add r6, r1, r12
eor r5, r5, r7, ror #25
add r7, r10, lr
eor r2, r2, r6, ror #25
eor r3, r3, r7, ror #25
strd r2, [sp, #6*4]
add r10, r3, r10
ldrd r6, [sp, #10*4]
add r11, r4, r11
eor r8, r8, r10, ror #23
add r10, r5, r0
eor r9, r9, r11, ror #23
add r11, r2, r1
eor r6, r6, r10, ror #23
eor r7, r7, r11, ror #23
strd r6, [sp, #10*4]
add r2, r7, r2
ldrd r10, [sp, #14*4]
add r3, r8, r3
eor r12, r12, r2, ror #19
add r2, r9, r4
eor lr, lr, r3, ror #19
add r3, r6, r5
eor r10, r10, r2, ror #19
eor r11, r11, r3, ror #19
ldrd r2, [sp, #2*4]
add r6, r11, r6
add r7, r12, r7
eor r0, r0, r6, ror #14
add r6, lr, r8
eor r1, r1, r7, ror #14
add r7, r10, r9
eor r2, r2, r6, ror #14
eor r3, r3, r7, ror #14
.endm
.macro salsa8_core
ldmia sp, {r0-r12, lr}
ldrd r10, [sp, #14*4]
salsa8_core_doubleround_body
ldrd r6, [sp, #6*4]
strd r2, [sp, #2*4]
strd r10, [sp, #14*4]
salsa8_core_doubleround_body
ldrd r6, [sp, #6*4]
strd r2, [sp, #2*4]
strd r10, [sp, #14*4]
salsa8_core_doubleround_body
ldrd r6, [sp, #6*4]
strd r2, [sp, #2*4]
strd r10, [sp, #14*4]
salsa8_core_doubleround_body
stmia sp, {r0-r5}
strd r8, [sp, #8*4]
str r12, [sp, #12*4]
str lr, [sp, #13*4]
strd r10, [sp, #14*4]
.endm
#else
.macro scrypt_shuffle
.endm
.macro salsa8_core_doubleround_body
ldr r8, [sp, #8*4]
add r11, r11, r10
ldr lr, [sp, #13*4]
add r12, r12, r3
eor r2, r2, r11, ror #23
add r11, r4, r0
eor r7, r7, r12, ror #23
add r12, r9, r5
str r9, [sp, #9*4]
eor r8, r8, r11, ror #23
str r10, [sp, #14*4]
eor lr, lr, r12, ror #23
ldr r11, [sp, #11*4]
add r9, lr, r9
ldr r12, [sp, #12*4]
add r10, r2, r10
eor r1, r1, r9, ror #19
add r9, r7, r3
eor r6, r6, r10, ror #19
add r10, r8, r4
str r8, [sp, #8*4]
eor r11, r11, r9, ror #19
str lr, [sp, #13*4]
eor r12, r12, r10, ror #19
ldr r9, [sp, #10*4]
add r8, r12, r8
ldr r10, [sp, #15*4]
add lr, r1, lr
eor r0, r0, r8, ror #14
add r8, r6, r2
eor r5, r5, lr, ror #14
add lr, r11, r7
eor r9, r9, r8, ror #14
ldr r8, [sp, #9*4]
eor r10, r10, lr, ror #14
ldr lr, [sp, #14*4]
add r8, r9, r8
str r9, [sp, #10*4]
add lr, r10, lr
str r10, [sp, #15*4]
eor r11, r11, r8, ror #25
add r8, r0, r3
eor r12, r12, lr, ror #25
add lr, r5, r4
eor r1, r1, r8, ror #25
ldr r8, [sp, #8*4]
eor r6, r6, lr, ror #25
add r9, r11, r9
ldr lr, [sp, #13*4]
add r10, r12, r10
eor r8, r8, r9, ror #23
add r9, r1, r0
eor lr, lr, r10, ror #23
add r10, r6, r5
str r11, [sp, #11*4]
eor r2, r2, r9, ror #23
str r12, [sp, #12*4]
eor r7, r7, r10, ror #23
ldr r9, [sp, #9*4]
add r11, r8, r11
ldr r10, [sp, #14*4]
add r12, lr, r12
eor r9, r9, r11, ror #19
add r11, r2, r1
eor r10, r10, r12, ror #19
add r12, r7, r6
str r8, [sp, #8*4]
eor r3, r3, r11, ror #19
str lr, [sp, #13*4]
eor r4, r4, r12, ror #19
.endm
.macro salsa8_core
ldmia sp, {r0-r7}
ldr r12, [sp, #15*4]
ldr r8, [sp, #11*4]
ldr lr, [sp, #12*4]
ldr r9, [sp, #9*4]
add r8, r8, r12
ldr r11, [sp, #10*4]
add lr, lr, r0
eor r3, r3, r8, ror #25
add r8, r5, r1
ldr r10, [sp, #14*4]
eor r4, r4, lr, ror #25
add lr, r11, r6
eor r9, r9, r8, ror #25
eor r10, r10, lr, ror #25
salsa8_core_doubleround_body
ldr r11, [sp, #10*4]
add r8, r9, r8
ldr r12, [sp, #15*4]
add lr, r10, lr
eor r11, r11, r8, ror #14
add r8, r3, r2
eor r12, r12, lr, ror #14
add lr, r4, r7
eor r0, r0, r8, ror #14
ldr r8, [sp, #11*4]
eor r5, r5, lr, ror #14
ldr lr, [sp, #12*4]
add r8, r8, r12
str r11, [sp, #10*4]
add lr, lr, r0
str r12, [sp, #15*4]
eor r3, r3, r8, ror #25
add r8, r5, r1
eor r4, r4, lr, ror #25
add lr, r11, r6
str r9, [sp, #9*4]
eor r9, r9, r8, ror #25
str r10, [sp, #14*4]
eor r10, r10, lr, ror #25
salsa8_core_doubleround_body
ldr r11, [sp, #10*4]
add r8, r9, r8
ldr r12, [sp, #15*4]
add lr, r10, lr
eor r11, r11, r8, ror #14
add r8, r3, r2
eor r12, r12, lr, ror #14
add lr, r4, r7
eor r0, r0, r8, ror #14
ldr r8, [sp, #11*4]
eor r5, r5, lr, ror #14
ldr lr, [sp, #12*4]
add r8, r8, r12
str r11, [sp, #10*4]
add lr, lr, r0
str r12, [sp, #15*4]
eor r3, r3, r8, ror #25
add r8, r5, r1
eor r4, r4, lr, ror #25
add lr, r11, r6
str r9, [sp, #9*4]
eor r9, r9, r8, ror #25
str r10, [sp, #14*4]
eor r10, r10, lr, ror #25
salsa8_core_doubleround_body
ldr r11, [sp, #10*4]
add r8, r9, r8
ldr r12, [sp, #15*4]
add lr, r10, lr
eor r11, r11, r8, ror #14
add r8, r3, r2
eor r12, r12, lr, ror #14
add lr, r4, r7
eor r0, r0, r8, ror #14
ldr r8, [sp, #11*4]
eor r5, r5, lr, ror #14
ldr lr, [sp, #12*4]
add r8, r8, r12
str r11, [sp, #10*4]
add lr, lr, r0
str r12, [sp, #15*4]
eor r3, r3, r8, ror #25
add r8, r5, r1
eor r4, r4, lr, ror #25
add lr, r11, r6
str r9, [sp, #9*4]
eor r9, r9, r8, ror #25
str r10, [sp, #14*4]
eor r10, r10, lr, ror #25
salsa8_core_doubleround_body
ldr r11, [sp, #10*4]
add r8, r9, r8
ldr r12, [sp, #15*4]
add lr, r10, lr
str r9, [sp, #9*4]
eor r11, r11, r8, ror #14
eor r12, r12, lr, ror #14
add r8, r3, r2
str r10, [sp, #14*4]
add lr, r4, r7
str r11, [sp, #10*4]
eor r0, r0, r8, ror #14
str r12, [sp, #15*4]
eor r5, r5, lr, ror #14
stmia sp, {r0-r7}
.endm
#endif
.macro scrypt_core_macro1a_x4
ldmia r0, {r4-r7}
ldmia lr!, {r8-r11}
stmia r1!, {r4-r7}
stmia r3!, {r8-r11}
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
stmia r0!, {r4-r7}
stmia r12!, {r4-r7}
.endm
.macro scrypt_core_macro1b_x4
ldmia r3!, {r8-r11}
ldmia r2, {r4-r7}
eor r8, r8, r4
eor r9, r9, r5
eor r10, r10, r6
eor r11, r11, r7
ldmia r0, {r4-r7}
stmia r2!, {r8-r11}
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
ldmia r1!, {r8-r11}
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
stmia r0!, {r4-r7}
stmia r12!, {r4-r7}
.endm
.macro scrypt_core_macro2_x4
ldmia r12, {r4-r7}
ldmia r0, {r8-r11}
add r4, r4, r8
add r5, r5, r9
add r6, r6, r10
add r7, r7, r11
stmia r0!, {r4-r7}
ldmia r2, {r8-r11}
eor r4, r4, r8
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
stmia r2!, {r4-r7}
stmia r12!, {r4-r7}
.endm
.macro scrypt_core_macro3_x4
ldmia r1!, {r4-r7}
ldmia r0, {r8-r11}
add r4, r4, r8
add r5, r5, r9
add r6, r6, r10
add r7, r7, r11
stmia r0!, {r4-r7}
.endm
.macro scrypt_core_macro3_x6
ldmia r1!, {r2-r7}
ldmia r0, {r8-r12, lr}
add r2, r2, r8
add r3, r3, r9
add r4, r4, r10
add r5, r5, r11
add r6, r6, r12
add r7, r7, lr
stmia r0!, {r2-r7}
.endm
.text
.code 32
.align 2
.globl scrypt_core
.globl _scrypt_core
#ifdef __ELF__
.type scrypt_core, %function
#endif
scrypt_core:
_scrypt_core:
stmfd sp!, {r4-r11, lr}
mov r12, sp
sub sp, sp, #22*4
bic sp, sp, #63
str r12, [sp, #20*4]
str r2, [sp, #21*4]
scrypt_shuffle
ldr r2, [sp, #21*4]
str r0, [sp, #16*4]
add r12, r1, r2, lsl #7
str r12, [sp, #18*4]
scrypt_core_loop1:
add lr, r0, #16*4
add r3, r1, #16*4
mov r12, sp
scrypt_core_macro1a_x4
scrypt_core_macro1a_x4
scrypt_core_macro1a_x4
scrypt_core_macro1a_x4
str r1, [sp, #17*4]
salsa8_core
ldr r0, [sp, #16*4]
mov r12, sp
add r2, r0, #16*4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
salsa8_core
ldr r0, [sp, #16*4]
mov r1, sp
add r0, r0, #16*4
scrypt_core_macro3_x6
scrypt_core_macro3_x6
ldr r3, [sp, #17*4]
ldr r12, [sp, #18*4]
scrypt_core_macro3_x4
add r1, r3, #16*4
sub r0, r0, #32*4
cmp r1, r12
bne scrypt_core_loop1
ldr r12, [sp, #21*4]
ldr r4, [r0, #16*4]
sub r2, r12, #1
str r2, [sp, #21*4]
sub r1, r1, r12, lsl #7
str r1, [sp, #17*4]
and r4, r4, r2
add r1, r1, r4, lsl #7
scrypt_core_loop2:
add r2, r0, #16*4
add r3, r1, #16*4
str r12, [sp, #18*4]
mov r12, sp
#ifdef __ARM_ARCH_5E_OR_6_OR_7__
pld [r1, #24*4]
pld [r1, #8*4]
#endif
scrypt_core_macro1b_x4
scrypt_core_macro1b_x4
scrypt_core_macro1b_x4
scrypt_core_macro1b_x4
salsa8_core
ldr r0, [sp, #16*4]
mov r12, sp
add r2, r0, #16*4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
salsa8_core
ldr r0, [sp, #16*4]
mov r1, sp
ldr r3, [sp, #17*4]
add r0, r0, #16*4
ldr r2, [sp, #21*4]
scrypt_core_macro3_x4
and r4, r4, r2
add r3, r3, r4, lsl #7
str r3, [sp, #19*4]
#ifdef __ARM_ARCH_5E_OR_6_OR_7__
pld [r3, #16*4]
pld [r3]
#endif
scrypt_core_macro3_x6
scrypt_core_macro3_x6
ldr r12, [sp, #18*4]
sub r0, r0, #32*4
ldr r1, [sp, #19*4]
subs r12, r12, #1
bne scrypt_core_loop2
scrypt_shuffle
ldr sp, [sp, #20*4]
#ifdef __thumb__
ldmfd sp!, {r4-r11, lr}
bx lr
#else
ldmfd sp!, {r4-r11, pc}
#endif
#ifdef __ARM_NEON__
.macro salsa8_core_3way_doubleround
ldrd r6, [sp, #6*4]
vadd.u32 q4, q0, q1
add r6, r2, r6
vadd.u32 q6, q8, q9
add r7, r3, r7
vshl.u32 q5, q4, #7
eor r10, r10, r6, ror #25
vshl.u32 q7, q6, #7
add r6, r0, r4
vshr.u32 q4, q4, #32-7
eor r11, r11, r7, ror #25
vshr.u32 q6, q6, #32-7
add r7, r1, r5
veor.u32 q3, q3, q5
strd r10, [sp, #14*4]
veor.u32 q11, q11, q7
eor r12, r12, r6, ror #25
veor.u32 q3, q3, q4
eor lr, lr, r7, ror #25
veor.u32 q11, q11, q6
ldrd r6, [sp, #10*4]
vadd.u32 q4, q3, q0
add r2, r10, r2
vadd.u32 q6, q11, q8
add r3, r11, r3
vshl.u32 q5, q4, #9
eor r6, r6, r2, ror #23
vshl.u32 q7, q6, #9
add r2, r12, r0
vshr.u32 q4, q4, #32-9
eor r7, r7, r3, ror #23
vshr.u32 q6, q6, #32-9
add r3, lr, r1
veor.u32 q2, q2, q5
strd r6, [sp, #10*4]
veor.u32 q10, q10, q7
eor r8, r8, r2, ror #23
veor.u32 q2, q2, q4
eor r9, r9, r3, ror #23
veor.u32 q10, q10, q6
ldrd r2, [sp, #6*4]
vadd.u32 q4, q2, q3
add r10, r6, r10
vadd.u32 q6, q10, q11
add r11, r7, r11
vext.u32 q3, q3, q3, #3
eor r2, r2, r10, ror #19
vshl.u32 q5, q4, #13
add r10, r8, r12
vext.u32 q11, q11, q11, #3
eor r3, r3, r11, ror #19
vshl.u32 q7, q6, #13
add r11, r9, lr
vshr.u32 q4, q4, #32-13
eor r4, r4, r10, ror #19
vshr.u32 q6, q6, #32-13
eor r5, r5, r11, ror #19
veor.u32 q1, q1, q5
veor.u32 q9, q9, q7
veor.u32 q1, q1, q4
veor.u32 q9, q9, q6
ldrd r10, [sp, #2*4]
vadd.u32 q4, q1, q2
add r6, r2, r6
vadd.u32 q6, q9, q10
add r7, r3, r7
vswp.u32 d4, d5
eor r10, r10, r6, ror #14
vshl.u32 q5, q4, #18
add r6, r4, r8
vswp.u32 d20, d21
eor r11, r11, r7, ror #14
vshl.u32 q7, q6, #18
add r7, r5, r9
vshr.u32 q4, q4, #32-18
eor r0, r0, r6, ror #14
vshr.u32 q6, q6, #32-18
eor r1, r1, r7, ror #14
veor.u32 q0, q0, q5
ldrd r6, [sp, #14*4]
veor.u32 q8, q8, q7
veor.u32 q0, q0, q4
veor.u32 q8, q8, q6
strd r2, [sp, #6*4]
vadd.u32 q4, q0, q3
strd r10, [sp, #2*4]
vadd.u32 q6, q8, q11
add r6, r11, r6
vext.u32 q1, q1, q1, #1
add r7, r0, r7
vshl.u32 q5, q4, #7
eor r4, r4, r6, ror #25
vext.u32 q9, q9, q9, #1
add r6, r1, r12
vshl.u32 q7, q6, #7
eor r5, r5, r7, ror #25
vshr.u32 q4, q4, #32-7
add r7, r10, lr
vshr.u32 q6, q6, #32-7
eor r2, r2, r6, ror #25
veor.u32 q1, q1, q5
eor r3, r3, r7, ror #25
veor.u32 q9, q9, q7
strd r2, [sp, #6*4]
veor.u32 q1, q1, q4
veor.u32 q9, q9, q6
add r10, r3, r10
vadd.u32 q4, q1, q0
ldrd r6, [sp, #10*4]
vadd.u32 q6, q9, q8
add r11, r4, r11
vshl.u32 q5, q4, #9
eor r8, r8, r10, ror #23
vshl.u32 q7, q6, #9
add r10, r5, r0
vshr.u32 q4, q4, #32-9
eor r9, r9, r11, ror #23
vshr.u32 q6, q6, #32-9
add r11, r2, r1
veor.u32 q2, q2, q5
eor r6, r6, r10, ror #23
veor.u32 q10, q10, q7
eor r7, r7, r11, ror #23
veor.u32 q2, q2, q4
strd r6, [sp, #10*4]
veor.u32 q10, q10, q6
add r2, r7, r2
vadd.u32 q4, q2, q1
ldrd r10, [sp, #14*4]
vadd.u32 q6, q10, q9
add r3, r8, r3
vext.u32 q1, q1, q1, #3
eor r12, r12, r2, ror #19
vshl.u32 q5, q4, #13
add r2, r9, r4
vext.u32 q9, q9, q9, #3
eor lr, lr, r3, ror #19
vshl.u32 q7, q6, #13
add r3, r6, r5
vshr.u32 q4, q4, #32-13
eor r10, r10, r2, ror #19
vshr.u32 q6, q6, #32-13
eor r11, r11, r3, ror #19
veor.u32 q3, q3, q5
veor.u32 q11, q11, q7
veor.u32 q3, q3, q4
veor.u32 q11, q11, q6
ldrd r2, [sp, #2*4]
vadd.u32 q4, q3, q2
add r6, r11, r6
vadd.u32 q6, q11, q10
add r7, r12, r7
vswp.u32 d4, d5
eor r0, r0, r6, ror #14
vshl.u32 q5, q4, #18
add r6, lr, r8
vswp.u32 d20, d21
eor r1, r1, r7, ror #14
vshl.u32 q7, q6, #18
add r7, r10, r9
vext.u32 q3, q3, q3, #1
eor r2, r2, r6, ror #14
vshr.u32 q4, q4, #32-18
eor r3, r3, r7, ror #14
vshr.u32 q6, q6, #32-18
strd r2, [sp, #2*4]
vext.u32 q11, q11, q11, #1
strd r10, [sp, #14*4]
veor.u32 q0, q0, q5
veor.u32 q8, q8, q7
veor.u32 q0, q0, q4
veor.u32 q8, q8, q6
.endm
.macro salsa8_core_3way
ldmia sp, {r0-r12, lr}
ldrd r10, [sp, #14*4]
salsa8_core_3way_doubleround
salsa8_core_3way_doubleround
salsa8_core_3way_doubleround
salsa8_core_3way_doubleround
stmia sp, {r0-r5}
strd r8, [sp, #8*4]
str r12, [sp, #12*4]
str lr, [sp, #13*4]
.endm
.text
.code 32
.align 2
.globl scrypt_core_3way
.globl _scrypt_core_3way
#ifdef __ELF__
.type scrypt_core_3way, %function
#endif
scrypt_core_3way:
_scrypt_core_3way:
stmfd sp!, {r4-r11, lr}
vpush {q4-q7}
mov r12, sp
sub sp, sp, #24*16
bic sp, sp, #63
str r2, [sp, #4*16+3*4]
str r12, [sp, #4*16+4*4]
mov r3, r0
vldmia r3!, {q8-q15}
vmov.u64 q0, #0xffffffff
vmov.u32 q1, q8
vmov.u32 q2, q12
vbif.u32 q8, q9, q0
vbif.u32 q12, q13, q0
vbif.u32 q9, q10, q0
vbif.u32 q13, q14, q0
vbif.u32 q10, q11, q0
vbif.u32 q14, q15, q0
vbif.u32 q11, q1, q0
vbif.u32 q15, q2, q0
vldmia r3!, {q0-q7}
vswp.u32 d17, d21
vswp.u32 d25, d29
vswp.u32 d18, d22
vswp.u32 d26, d30
vstmia r0, {q8-q15}
vmov.u64 q8, #0xffffffff
vmov.u32 q9, q0
vmov.u32 q10, q4
vbif.u32 q0, q1, q8
vbif.u32 q4, q5, q8
vbif.u32 q1, q2, q8
vbif.u32 q5, q6, q8
vbif.u32 q2, q3, q8
vbif.u32 q6, q7, q8
vbif.u32 q3, q9, q8
vbif.u32 q7, q10, q8
vldmia r3, {q8-q15}
vswp.u32 d1, d5
vswp.u32 d9, d13
vswp.u32 d2, d6
vswp.u32 d10, d14
add r12, sp, #8*16
vstmia r12!, {q0-q7}
vmov.u64 q0, #0xffffffff
vmov.u32 q1, q8
vmov.u32 q2, q12
vbif.u32 q8, q9, q0
vbif.u32 q12, q13, q0
vbif.u32 q9, q10, q0
vbif.u32 q13, q14, q0
vbif.u32 q10, q11, q0
vbif.u32 q14, q15, q0
vbif.u32 q11, q1, q0
vbif.u32 q15, q2, q0
vswp.u32 d17, d21
vswp.u32 d25, d29
vswp.u32 d18, d22
vswp.u32 d26, d30
vstmia r12, {q8-q15}
add lr, sp, #128
vldmia lr, {q0-q7}
add r2, r1, r2, lsl #7
str r0, [sp, #4*16+0*4]
str r2, [sp, #4*16+2*4]
scrypt_core_3way_loop1:
add lr, r0, #16*4
add r3, r1, #16*4
str r1, [sp, #4*16+1*4]
mov r12, sp
scrypt_core_macro1a_x4
scrypt_core_macro1a_x4
scrypt_core_macro1a_x4
ldr r2, [sp, #4*16+3*4]
scrypt_core_macro1a_x4
sub r1, r1, #4*16
add r1, r1, r2, lsl #7
vstmia r1, {q0-q7}
add r3, r1, r2, lsl #7
vstmia r3, {q8-q15}
add lr, sp, #128
veor.u32 q0, q0, q4
veor.u32 q1, q1, q5
veor.u32 q2, q2, q6
veor.u32 q3, q3, q7
vstmia lr, {q0-q3}
veor.u32 q8, q8, q12
veor.u32 q9, q9, q13
veor.u32 q10, q10, q14
veor.u32 q11, q11, q15
add r12, sp, #256
vstmia r12, {q8-q11}
salsa8_core_3way
ldr r0, [sp, #4*16+0*4]
mov r12, sp
add r2, r0, #16*4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
add lr, sp, #128
vldmia lr, {q4-q7}
vadd.u32 q4, q4, q0
vadd.u32 q5, q5, q1
vadd.u32 q6, q6, q2
vadd.u32 q7, q7, q3
add r12, sp, #256
vldmia r12, {q0-q3}
vstmia lr, {q4-q7}
vadd.u32 q8, q8, q0
vadd.u32 q9, q9, q1
vadd.u32 q10, q10, q2
vadd.u32 q11, q11, q3
add r4, sp, #128+4*16
vldmia r4, {q0-q3}
vstmia r12, {q8-q11}
veor.u32 q0, q0, q4
veor.u32 q1, q1, q5
veor.u32 q2, q2, q6
veor.u32 q3, q3, q7
vstmia r4, {q0-q3}
veor.u32 q8, q8, q12
veor.u32 q9, q9, q13
veor.u32 q10, q10, q14
veor.u32 q11, q11, q15
vmov q12, q8
vmov q13, q9
vmov q14, q10
vmov q15, q11
salsa8_core_3way
ldr r0, [sp, #4*16+0*4]
mov r1, sp
add r0, r0, #16*4
scrypt_core_macro3_x6
scrypt_core_macro3_x6
scrypt_core_macro3_x4
sub r0, r0, #8*16
ldr r1, [sp, #4*16+1*4]
ldr r2, [sp, #4*16+2*4]
add lr, sp, #128
add r4, sp, #128+4*16
vldmia r4, {q4-q7}
vadd.u32 q4, q4, q0
vadd.u32 q5, q5, q1
vadd.u32 q6, q6, q2
vadd.u32 q7, q7, q3
vstmia r4, {q4-q7}
vldmia lr, {q0-q3}
vadd.u32 q12, q12, q8
vadd.u32 q13, q13, q9
vadd.u32 q14, q14, q10
vadd.u32 q15, q15, q11
add r12, sp, #256
vldmia r12, {q8-q11}
add r1, r1, #8*16
cmp r1, r2
bne scrypt_core_3way_loop1
ldr r2, [sp, #4*16+3*4]
add r5, sp, #256+4*16
vstmia r5, {q12-q15}
sub r1, r1, r2, lsl #7
str r1, [sp, #4*16+1*4]
scrypt_core_3way_loop2:
str r2, [sp, #4*16+2*4]
ldr r0, [sp, #4*16+0*4]
ldr r1, [sp, #4*16+1*4]
ldr r2, [sp, #4*16+3*4]
ldr r4, [r0, #16*4]
sub r2, r2, #1
and r4, r4, r2
add r1, r1, r4, lsl #7
add r2, r0, #16*4
add r3, r1, #16*4
mov r12, sp
scrypt_core_macro1b_x4
scrypt_core_macro1b_x4
scrypt_core_macro1b_x4
scrypt_core_macro1b_x4
ldr r1, [sp, #4*16+1*4]
ldr r2, [sp, #4*16+3*4]
add r1, r1, r2, lsl #7
add r3, r1, r2, lsl #7
sub r2, r2, #1
vmov r6, r7, d8
and r6, r6, r2
add r6, r1, r6, lsl #7
vmov r7, r8, d24
add lr, sp, #128
vldmia lr, {q0-q3}
pld [r6]
pld [r6, #8*4]
pld [r6, #16*4]
pld [r6, #24*4]
vldmia r6, {q8-q15}
and r7, r7, r2
add r7, r3, r7, lsl #7
veor.u32 q8, q8, q0
veor.u32 q9, q9, q1
veor.u32 q10, q10, q2
veor.u32 q11, q11, q3
pld [r7]
pld [r7, #8*4]
pld [r7, #16*4]
pld [r7, #24*4]
veor.u32 q12, q12, q4
veor.u32 q13, q13, q5
veor.u32 q14, q14, q6
veor.u32 q15, q15, q7
vldmia r7, {q0-q7}
vstmia lr, {q8-q15}
add r12, sp, #256
vldmia r12, {q8-q15}
veor.u32 q8, q8, q0
veor.u32 q9, q9, q1
veor.u32 q10, q10, q2
veor.u32 q11, q11, q3
veor.u32 q12, q12, q4
veor.u32 q13, q13, q5
veor.u32 q14, q14, q6
veor.u32 q15, q15, q7
vldmia lr, {q0-q7}
veor.u32 q0, q0, q4
veor.u32 q1, q1, q5
veor.u32 q2, q2, q6
veor.u32 q3, q3, q7
vstmia lr, {q0-q3}
veor.u32 q8, q8, q12
veor.u32 q9, q9, q13
veor.u32 q10, q10, q14
veor.u32 q11, q11, q15
vstmia r12, {q8-q15}
salsa8_core_3way
ldr r0, [sp, #4*16+0*4]
mov r12, sp
add r2, r0, #16*4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
scrypt_core_macro2_x4
add lr, sp, #128
vldmia lr, {q4-q7}
vadd.u32 q4, q4, q0
vadd.u32 q5, q5, q1
vadd.u32 q6, q6, q2
vadd.u32 q7, q7, q3
add r12, sp, #256
vldmia r12, {q12-q15}
vstmia lr, {q4-q7}
vadd.u32 q12, q12, q8
vadd.u32 q13, q13, q9
vadd.u32 q14, q14, q10
vadd.u32 q15, q15, q11
add r4, sp, #128+4*16
vldmia r4, {q0-q3}
vstmia r12, {q12-q15}
veor.u32 q0, q0, q4
veor.u32 q1, q1, q5
veor.u32 q2, q2, q6
veor.u32 q3, q3, q7
add r5, sp, #256+4*16
vldmia r5, {q8-q11}
vstmia r4, {q0-q3}
veor.u32 q8, q8, q12
veor.u32 q9, q9, q13
veor.u32 q10, q10, q14
veor.u32 q11, q11, q15
vmov q12, q8
vmov q13, q9
vmov q14, q10
vmov q15, q11
salsa8_core_3way
ldr r0, [sp, #4*16+0*4]
ldr r3, [sp, #4*16+1*4]
ldr r2, [sp, #4*16+3*4]
mov r1, sp
add r0, r0, #16*4
sub r2, r2, #1
scrypt_core_macro3_x4
and r4, r4, r2
add r3, r3, r4, lsl #7
pld [r3, #16*4]
pld [r3]
pld [r3, #24*4]
pld [r3, #8*4]
scrypt_core_macro3_x6
scrypt_core_macro3_x6
add lr, sp, #128
add r4, sp, #128+4*16
vldmia r4, {q4-q7}
vadd.u32 q4, q4, q0
vadd.u32 q5, q5, q1
vadd.u32 q6, q6, q2
vadd.u32 q7, q7, q3
vstmia r4, {q4-q7}
vadd.u32 q12, q12, q8
vadd.u32 q13, q13, q9
vadd.u32 q14, q14, q10
vadd.u32 q15, q15, q11
add r5, sp, #256+4*16
vstmia r5, {q12-q15}
ldr r2, [sp, #4*16+2*4]
subs r2, r2, #1
bne scrypt_core_3way_loop2
ldr r0, [sp, #4*16+0*4]
vldmia r0, {q8-q15}
vmov.u64 q0, #0xffffffff
vmov.u32 q1, q8
vmov.u32 q2, q12
vbif.u32 q8, q9, q0
vbif.u32 q12, q13, q0
vbif.u32 q9, q10, q0
vbif.u32 q13, q14, q0
vbif.u32 q10, q11, q0
vbif.u32 q14, q15, q0
vbif.u32 q11, q1, q0
vbif.u32 q15, q2, q0
add r12, sp, #8*16
vldmia r12!, {q0-q7}
vswp.u32 d17, d21
vswp.u32 d25, d29
vswp.u32 d18, d22
vswp.u32 d26, d30
vstmia r0!, {q8-q15}
vmov.u64 q8, #0xffffffff
vmov.u32 q9, q0
vmov.u32 q10, q4
vbif.u32 q0, q1, q8
vbif.u32 q4, q5, q8
vbif.u32 q1, q2, q8
vbif.u32 q5, q6, q8
vbif.u32 q2, q3, q8
vbif.u32 q6, q7, q8
vbif.u32 q3, q9, q8
vbif.u32 q7, q10, q8
vldmia r12, {q8-q15}
vswp.u32 d1, d5
vswp.u32 d9, d13
vswp.u32 d2, d6
vswp.u32 d10, d14
vstmia r0!, {q0-q7}
vmov.u64 q0, #0xffffffff
vmov.u32 q1, q8
vmov.u32 q2, q12
vbif.u32 q8, q9, q0
vbif.u32 q12, q13, q0
vbif.u32 q9, q10, q0
vbif.u32 q13, q14, q0
vbif.u32 q10, q11, q0
vbif.u32 q14, q15, q0
vbif.u32 q11, q1, q0
vbif.u32 q15, q2, q0
vswp.u32 d17, d21
vswp.u32 d25, d29
vswp.u32 d18, d22
vswp.u32 d26, d30
vstmia r0, {q8-q15}
ldr sp, [sp, #4*16+4*4]
vpop {q4-q7}
ldmfd sp!, {r4-r11, pc}
#endif /* __ARM_NEON__ */
#endif
07070100000024000081A4000003E800000064000000015EF4BCA100004C75000000000000000000000000000000000000001C00000000cpuminer-2.5.1/scrypt-ppc.S/*
* Copyright 2014-2015 pooler@litecoinpool.org
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include "cpuminer-config.h"
#if defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
#ifndef __APPLE__
#define r0 0
#define r1 1
#define r2 2
#define r3 3
#define r4 4
#define r5 5
#define r6 6
#define r7 7
#define r8 8
#define r9 9
#define r10 10
#define r11 11
#define r12 12
#define r13 13
#define r14 14
#define r15 15
#define r16 16
#define r17 17
#define r18 18
#define r19 19
#define r20 20
#define r21 21
#define r22 22
#define r23 23
#define r24 24
#define r25 25
#define r26 26
#define r27 27
#define r28 28
#define r29 29
#define r30 30
#define r31 31
#ifdef __ALTIVEC__
#define v0 0
#define v1 1
#define v2 2
#define v3 3
#define v4 4
#define v5 5
#define v6 6
#define v7 7
#define v8 8
#define v9 9
#define v10 10
#define v11 11
#define v12 12
#define v13 13
#define v14 14
#define v15 15
#define v16 16
#define v17 17
#define v18 18
#define v19 19
#define v20 20
#define v21 21
#define v22 22
#define v23 23
#define v24 24
#define v25 25
#define v26 26
#define v27 27
#define v28 28
#define v29 29
#define v30 30
#define v31 31
#endif
#endif
#if !(defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) || \
defined(__64BIT__) || defined(_LP64) || defined(__LP64__))
#define ld lwz
#define std stw
#define stdu stwu
#define stdux stwux
#endif
#ifdef __ALTIVEC__
#ifdef __APPLE__
.machine ppc7400
#endif
.macro salsa8_core_doubleround
vadduwm v4, v0, v1
vrlw v4, v4, v16
vxor v3, v3, v4
vadduwm v4, v3, v0
vrlw v4, v4, v17
vxor v2, v2, v4
vadduwm v4, v2, v3
vrlw v4, v4, v18
vsldoi v3, v3, v3, 12
vxor v1, v1, v4
vadduwm v4, v1, v2
vrlw v4, v4, v19
vsldoi v1, v1, v1, 4
vxor v0, v0, v4
vadduwm v4, v0, v3
vrlw v4, v4, v16
vsldoi v2, v2, v2, 8
vxor v1, v1, v4
vadduwm v4, v1, v0
vrlw v4, v4, v17
vxor v2, v2, v4
vadduwm v4, v2, v1
vrlw v4, v4, v18
vsldoi v1, v1, v1, 12
vxor v3, v3, v4
vadduwm v4, v3, v2
vrlw v4, v4, v19
vsldoi v3, v3, v3, 4
vxor v0, v0, v4
vsldoi v2, v2, v2, 8
.endm
.macro salsa8_core
salsa8_core_doubleround
salsa8_core_doubleround
salsa8_core_doubleround
salsa8_core_doubleround
.endm
#ifdef _AIX
.csect .text[PR]
#else
.text
#endif
.align 2
.globl scrypt_core
.globl _scrypt_core
.globl .scrypt_core
#ifdef __ELF__
.type scrypt_core, %function
#endif
scrypt_core:
_scrypt_core:
.scrypt_core:
stdu r1, -4*4(r1)
mfspr r0, 256
std r0, 2*4(r1)
oris r0, r0, 0xffff
ori r0, r0, 0xf000
mtspr 256, r0
li r6, 1*16
li r7, 2*16
li r8, 3*16
li r9, 4*16
li r10, 5*16
li r11, 6*16
li r12, 7*16
lvx v8, 0, r3
lvx v9, r3, r6
lvx v10, r3, r7
lvx v11, r3, r8
lvx v12, r3, r9
lvx v13, r3, r10
lvx v14, r3, r11
lvx v15, r3, r12
vxor v0, v0, v0
vnor v1, v0, v0
vsldoi v2, v0, v1, 4
vsldoi v3, v2, v0, 8
vor v3, v3, v2
vsldoi v1, v0, v1, 8
vor v4, v8, v8
vsel v8, v8, v9, v3
vsel v9, v9, v10, v3
vsel v10, v10, v11, v3
vsel v11, v11, v4, v3
vor v4, v8, v8
vor v5, v9, v9
vsel v8, v8, v10, v1
vsel v9, v11, v9, v1
vsel v10, v10, v4, v1
vsel v11, v5, v11, v1
vor v4, v12, v12
vsel v12, v12, v13, v3
vsel v13, v13, v14, v3
vsel v14, v14, v15, v3
vsel v15, v15, v4, v3
vor v4, v12, v12
vor v5, v13, v13
vsel v12, v12, v14, v1
vsel v13, v15, v13, v1
vsel v14, v14, v4, v1
vsel v15, v5, v15, v1
vspltisw v16, 7
vspltisw v17, 9
vspltisw v18, 13
vadduwm v19, v17, v17
mtctr r5
scrypt_core_loop1:
vxor v8, v8, v12
stvx v8, 0, r4
vxor v9, v9, v13
stvx v9, r4, r6
vxor v10, v10, v14
stvx v10, r4, r7
vxor v11, v11, v15
stvx v11, r4, r8
vor v0, v8, v8
stvx v12, r4, r9
vor v1, v9, v9
stvx v13, r4, r10
vor v2, v10, v10
stvx v14, r4, r11
vor v3, v11, v11
stvx v15, r4, r12
salsa8_core
vadduwm v8, v8, v0
vadduwm v9, v9, v1
vadduwm v10, v10, v2
vadduwm v11, v11, v3
vxor v12, v12, v8
vxor v13, v13, v9
vxor v14, v14, v10
vxor v15, v15, v11
vor v0, v12, v12
vor v1, v13, v13
vor v2, v14, v14
vor v3, v15, v15
salsa8_core
vadduwm v12, v12, v0
vadduwm v13, v13, v1
vadduwm v14, v14, v2
vadduwm v15, v15, v3
addi r4, r4, 32*4
bdnz scrypt_core_loop1
stvx v12, 0, r3
slwi r6, r5, 7
subf r4, r6, r4
mtctr r5
addi r5, r5, -1
addi r7, r4, 1*16
addi r8, r4, 2*16
addi r9, r4, 3*16
scrypt_core_loop2:
lwz r6, 0(r3)
and r6, r6, r5
slwi r6, r6, 7
lvx v0, r4, r6
vxor v8, v8, v12
lvx v1, r7, r6
vxor v9, v9, v13
lvx v2, r8, r6
vxor v10, v10, v14
lvx v3, r9, r6
vxor v11, v11, v15
vxor v0, v0, v8
vxor v1, v1, v9
vxor v2, v2, v10
vxor v3, v3, v11
addi r6, r6, 64
vor v8, v0, v0
vor v9, v1, v1
lvx v5, r4, r6
vor v10, v2, v2
lvx v6, r7, r6
vor v11, v3, v3
lvx v7, r8, r6
salsa8_core
vadduwm v8, v8, v0
lvx v0, r9, r6
vadduwm v9, v9, v1
vadduwm v10, v10, v2
vadduwm v11, v11, v3
vxor v12, v12, v5
vxor v13, v13, v6
vxor v14, v14, v7
vxor v15, v15, v0
vxor v12, v12, v8
vxor v13, v13, v9
vxor v14, v14, v10
vxor v15, v15, v11
vor v0, v12, v12
vor v1, v13, v13
vor v2, v14, v14
vor v3, v15, v15
salsa8_core
vadduwm v12, v12, v0
stvx v12, 0, r3
vadduwm v13, v13, v1
vadduwm v14, v14, v2
vadduwm v15, v15, v3
bdnz scrypt_core_loop2
vxor v0, v0, v0
vnor v1, v0, v0
vsldoi v2, v0, v1, 4
vsldoi v3, v2, v0, 8
vor v3, v3, v2
vsldoi v1, v0, v1, 8
vor v4, v8, v8
vsel v8, v8, v9, v3
vsel v9, v9, v10, v3
vsel v10, v10, v11, v3
vsel v11, v11, v4, v3
vor v4, v8, v8
vor v5, v9, v9
vsel v8, v8, v10, v1
vsel v9, v11, v9, v1
vsel v10, v10, v4, v1
vsel v11, v5, v11, v1
vor v4, v12, v12
vsel v12, v12, v13, v3
vsel v13, v13, v14, v3
vsel v14, v14, v15, v3
vsel v15, v15, v4, v3
vor v4, v12, v12
vor v5, v13, v13
vsel v12, v12, v14, v1
vsel v13, v15, v13, v1
vsel v14, v14, v4, v1
vsel v15, v5, v15, v1
li r6, 1*16
li r7, 2*16
li r8, 3*16
li r9, 4*16
stvx v8, 0, r3
stvx v9, r3, r6
stvx v10, r3, r7
stvx v11, r3, r8
stvx v12, r3, r9
stvx v13, r3, r10
stvx v14, r3, r11
stvx v15, r3, r12
ld r0, 2*4(r1)
mtspr 256, r0
addi r1, r1, 4*4
blr
#else /* __ALTIVEC__ */
.macro salsa8_core_doubleround
add r0, r16, r28
add r5, r21, r17
add r6, r26, r22
add r7, r31, r27
rotlwi r0, r0, 7
rotlwi r5, r5, 7
rotlwi r6, r6, 7
rotlwi r7, r7, 7
xor r20, r20, r0
xor r25, r25, r5
xor r30, r30, r6
xor r19, r19, r7
add r0, r20, r16
add r5, r25, r21
add r6, r30, r26
add r7, r19, r31
rotlwi r0, r0, 9
rotlwi r5, r5, 9
rotlwi r6, r6, 9
rotlwi r7, r7, 9
xor r24, r24, r0
xor r29, r29, r5
xor r18, r18, r6
xor r23, r23, r7
add r0, r24, r20
add r5, r29, r25
add r6, r18, r30
add r7, r23, r19
rotlwi r0, r0, 13
rotlwi r5, r5, 13
rotlwi r6, r6, 13
rotlwi r7, r7, 13
xor r28, r28, r0
xor r17, r17, r5
xor r22, r22, r6
xor r27, r27, r7
add r0, r28, r24
add r5, r17, r29
add r6, r22, r18
add r7, r27, r23
rotlwi r0, r0, 18
rotlwi r5, r5, 18
rotlwi r6, r6, 18
rotlwi r7, r7, 18
xor r16, r16, r0
xor r21, r21, r5
xor r26, r26, r6
xor r31, r31, r7
add r0, r16, r19
add r5, r21, r20
add r6, r26, r25
add r7, r31, r30
rotlwi r0, r0, 7
rotlwi r5, r5, 7
rotlwi r6, r6, 7
rotlwi r7, r7, 7
xor r17, r17, r0
xor r22, r22, r5
xor r27, r27, r6
xor r28, r28, r7
add r0, r17, r16
add r5, r22, r21
add r6, r27, r26
add r7, r28, r31
rotlwi r0, r0, 9
rotlwi r5, r5, 9
rotlwi r6, r6, 9
rotlwi r7, r7, 9
xor r18, r18, r0
xor r23, r23, r5
xor r24, r24, r6
xor r29, r29, r7
add r0, r18, r17
add r5, r23, r22
add r6, r24, r27
add r7, r29, r28
rotlwi r0, r0, 13
rotlwi r5, r5, 13
rotlwi r6, r6, 13
rotlwi r7, r7, 13
xor r19, r19, r0
xor r20, r20, r5
xor r25, r25, r6
xor r30, r30, r7
add r0, r19, r18
add r5, r20, r23
add r6, r25, r24
add r7, r30, r29
rotlwi r0, r0, 18
rotlwi r5, r5, 18
rotlwi r6, r6, 18
rotlwi r7, r7, 18
xor r16, r16, r0
xor r21, r21, r5
xor r26, r26, r6
xor r31, r31, r7
.endm
.macro salsa8_core
salsa8_core_doubleround
salsa8_core_doubleround
salsa8_core_doubleround
salsa8_core_doubleround
.endm
#ifdef _AIX
.csect .text[PR]
#else
.text
#endif
.align 2
.globl scrypt_core
.globl _scrypt_core
.globl .scrypt_core
#ifdef __ELF__
.type scrypt_core, %function
#endif
scrypt_core:
_scrypt_core:
.scrypt_core:
stdu r1, -68*4(r1)
stw r5, 2*4(r1)
std r13, 4*4(r1)
std r14, 6*4(r1)
std r15, 8*4(r1)
std r16, 10*4(r1)
std r17, 12*4(r1)
std r18, 14*4(r1)
std r19, 16*4(r1)
std r20, 18*4(r1)
std r21, 20*4(r1)
std r3, 22*4(r1)
std r22, 48*4(r1)
std r23, 50*4(r1)
std r24, 52*4(r1)
std r25, 54*4(r1)
std r26, 56*4(r1)
std r27, 58*4(r1)
std r28, 60*4(r1)
std r29, 62*4(r1)
std r30, 64*4(r1)
std r31, 66*4(r1)
lwz r16, 0*4(r3)
lwz r17, 1*4(r3)
lwz r18, 2*4(r3)
lwz r19, 3*4(r3)
lwz r20, 4*4(r3)
lwz r21, 5*4(r3)
lwz r22, 6*4(r3)
lwz r23, 7*4(r3)
stw r16, 24*4(r1)
stw r17, 25*4(r1)
stw r18, 26*4(r1)
stw r19, 27*4(r1)
stw r20, 28*4(r1)
stw r21, 29*4(r1)
stw r22, 30*4(r1)
stw r23, 31*4(r1)
lwz r24, 8*4(r3)
lwz r25, 9*4(r3)
lwz r26, 10*4(r3)
lwz r27, 11*4(r3)
lwz r28, 12*4(r3)
lwz r29, 13*4(r3)
lwz r30, 14*4(r3)
lwz r31, 15*4(r3)
stw r24, 32*4(r1)
stw r25, 33*4(r1)
stw r26, 34*4(r1)
stw r27, 35*4(r1)
stw r28, 36*4(r1)
stw r29, 37*4(r1)
stw r30, 38*4(r1)
stw r31, 39*4(r1)
lwz r16, 16*4(r3)
lwz r17, 17*4(r3)
lwz r18, 18*4(r3)
lwz r19, 19*4(r3)
lwz r20, 20*4(r3)
lwz r21, 21*4(r3)
lwz r22, 22*4(r3)
lwz r23, 23*4(r3)
stw r16, 40*4(r1)
stw r17, 41*4(r1)
stw r18, 42*4(r1)
stw r19, 43*4(r1)
stw r20, 44*4(r1)
stw r21, 45*4(r1)
stw r22, 46*4(r1)
stw r23, 47*4(r1)
lwz r8, 24*4(r3)
lwz r9, 25*4(r3)
lwz r10, 26*4(r3)
lwz r11, 27*4(r3)
lwz r12, 28*4(r3)
lwz r13, 29*4(r3)
lwz r14, 30*4(r3)
lwz r15, 31*4(r3)
mtctr r5
scrypt_core_loop1:
lwz r16, 24*4(r1)
lwz r17, 25*4(r1)
lwz r18, 26*4(r1)
lwz r19, 27*4(r1)
lwz r20, 28*4(r1)
lwz r21, 29*4(r1)
lwz r22, 30*4(r1)
lwz r23, 31*4(r1)
lwz r24, 32*4(r1)
lwz r25, 33*4(r1)
lwz r26, 34*4(r1)
lwz r27, 35*4(r1)
lwz r28, 36*4(r1)
lwz r29, 37*4(r1)
lwz r30, 38*4(r1)
lwz r31, 39*4(r1)
lwz r0, 40*4(r1)
lwz r5, 41*4(r1)
lwz r6, 42*4(r1)
lwz r7, 43*4(r1)
xor r16, r16, r0
xor r17, r17, r5
xor r18, r18, r6
xor r19, r19, r7
stw r16, 0*4(r4)
stw r17, 1*4(r4)
stw r18, 2*4(r4)
stw r19, 3*4(r4)
stw r0, 16*4(r4)
stw r5, 17*4(r4)
stw r6, 18*4(r4)
stw r7, 19*4(r4)
lwz r0, 44*4(r1)
lwz r5, 45*4(r1)
lwz r6, 46*4(r1)
lwz r7, 47*4(r1)
xor r20, r20, r0
xor r21, r21, r5
xor r22, r22, r6
xor r23, r23, r7
stw r0, 20*4(r4)
stw r5, 21*4(r4)
stw r6, 22*4(r4)
stw r7, 23*4(r4)
stw r20, 4*4(r4)
stw r21, 5*4(r4)
stw r22, 6*4(r4)
stw r23, 7*4(r4)
xor r24, r24, r8
xor r25, r25, r9
xor r26, r26, r10
xor r27, r27, r11
xor r28, r28, r12
xor r29, r29, r13
xor r30, r30, r14
xor r31, r31, r15
stw r24, 8*4(r4)
stw r25, 9*4(r4)
stw r26, 10*4(r4)
stw r27, 11*4(r4)
stw r28, 12*4(r4)
stw r29, 13*4(r4)
stw r30, 14*4(r4)
stw r31, 15*4(r4)
stw r8, 24*4(r4)
stw r9, 25*4(r4)
stw r10, 26*4(r4)
stw r11, 27*4(r4)
stw r12, 28*4(r4)
stw r13, 29*4(r4)
stw r14, 30*4(r4)
stw r15, 31*4(r4)
salsa8_core
lwz r0, 0*4(r4)
lwz r5, 1*4(r4)
lwz r6, 2*4(r4)
lwz r7, 3*4(r4)
add r16, r16, r0
add r17, r17, r5
add r18, r18, r6
add r19, r19, r7
lwz r0, 4*4(r4)
lwz r5, 5*4(r4)
lwz r6, 6*4(r4)
lwz r7, 7*4(r4)
add r20, r20, r0
add r21, r21, r5
add r22, r22, r6
add r23, r23, r7
lwz r0, 8*4(r4)
lwz r5, 9*4(r4)
lwz r6, 10*4(r4)
lwz r7, 11*4(r4)
add r24, r24, r0
add r25, r25, r5
add r26, r26, r6
add r27, r27, r7
lwz r0, 12*4(r4)
lwz r5, 13*4(r4)
lwz r6, 14*4(r4)
lwz r7, 15*4(r4)
add r28, r28, r0
add r29, r29, r5
add r30, r30, r6
add r31, r31, r7
stw r16, 24*4(r1)
stw r17, 25*4(r1)
stw r18, 26*4(r1)
stw r19, 27*4(r1)
stw r20, 28*4(r1)
stw r21, 29*4(r1)
stw r22, 30*4(r1)
stw r23, 31*4(r1)
stw r24, 32*4(r1)
stw r25, 33*4(r1)
stw r26, 34*4(r1)
stw r27, 35*4(r1)
stw r28, 36*4(r1)
stw r29, 37*4(r1)
stw r30, 38*4(r1)
stw r31, 39*4(r1)
lwz r0, 40*4(r1)
lwz r5, 41*4(r1)
lwz r6, 42*4(r1)
lwz r7, 43*4(r1)
xor r16, r16, r0
xor r17, r17, r5
xor r18, r18, r6
xor r19, r19, r7
lwz r0, 44*4(r1)
lwz r5, 45*4(r1)
lwz r6, 46*4(r1)
lwz r7, 47*4(r1)
xor r20, r20, r0
xor r21, r21, r5
xor r22, r22, r6
xor r23, r23, r7
xor r24, r24, r8
xor r25, r25, r9
xor r26, r26, r10
xor r27, r27, r11
xor r28, r28, r12
xor r29, r29, r13
xor r30, r30, r14
xor r31, r31, r15
stw r16, 40*4(r1)
stw r17, 41*4(r1)
stw r18, 42*4(r1)
stw r19, 43*4(r1)
mr r8, r24
mr r9, r25
mr r10, r26
mr r11, r27
stw r20, 44*4(r1)
stw r21, 45*4(r1)
stw r22, 46*4(r1)
stw r23, 47*4(r1)
mr r12, r28
mr r13, r29
mr r14, r30
mr r15, r31
salsa8_core
lwz r0, 40*4(r1)
lwz r5, 41*4(r1)
lwz r6, 42*4(r1)
lwz r7, 43*4(r1)
add r16, r16, r0
add r17, r17, r5
add r18, r18, r6
add r19, r19, r7
lwz r0, 44*4(r1)
lwz r5, 45*4(r1)
lwz r6, 46*4(r1)
lwz r7, 47*4(r1)
add r20, r20, r0
add r21, r21, r5
add r22, r22, r6
add r23, r23, r7
add r8, r8, r24
add r9, r9, r25
add r10, r10, r26
add r11, r11, r27
stw r16, 40*4(r1)
stw r17, 41*4(r1)
stw r18, 42*4(r1)
stw r19, 43*4(r1)
add r12, r12, r28
add r13, r13, r29
add r14, r14, r30
add r15, r15, r31
stw r20, 44*4(r1)
stw r21, 45*4(r1)
stw r22, 46*4(r1)
stw r23, 47*4(r1)
addi r4, r4, 32*4
bdnz scrypt_core_loop1
lwz r5, 2*4(r1)
slwi r3, r5, 7
subf r4, r3, r4
mtctr r5
addi r5, r5, -1
stw r5, 2*4(r1)
scrypt_core_loop2:
and r3, r16, r5
slwi r3, r3, 7
add r3, r3, r4
mr r0, r16
mr r5, r17
mr r6, r18
mr r7, r19
lwz r16, 24*4(r1)
lwz r17, 25*4(r1)
lwz r18, 26*4(r1)
lwz r19, 27*4(r1)
lwz r20, 28*4(r1)
lwz r21, 29*4(r1)
lwz r22, 30*4(r1)
lwz r23, 31*4(r1)
lwz r24, 32*4(r1)
lwz r25, 33*4(r1)
lwz r26, 34*4(r1)
lwz r27, 35*4(r1)
lwz r28, 36*4(r1)
lwz r29, 37*4(r1)
lwz r30, 38*4(r1)
lwz r31, 39*4(r1)
xor r16, r16, r0
xor r17, r17, r5
xor r18, r18, r6
xor r19, r19, r7
lwz r0, 44*4(r1)
lwz r5, 45*4(r1)
lwz r6, 46*4(r1)
lwz r7, 47*4(r1)
xor r20, r20, r0
xor r21, r21, r5
xor r22, r22, r6
xor r23, r23, r7
xor r24, r24, r8
xor r25, r25, r9
xor r26, r26, r10
xor r27, r27, r11
xor r28, r28, r12
xor r29, r29, r13
xor r30, r30, r14
xor r31, r31, r15
lwz r0, 0*4(r3)
lwz r5, 1*4(r3)
lwz r6, 2*4(r3)
lwz r7, 3*4(r3)
xor r16, r16, r0
xor r17, r17, r5
xor r18, r18, r6
xor r19, r19, r7
lwz r0, 4*4(r3)
lwz r5, 5*4(r3)
lwz r6, 6*4(r3)
lwz r7, 7*4(r3)
xor r20, r20, r0
xor r21, r21, r5
xor r22, r22, r6
xor r23, r23, r7
lwz r0, 8*4(r3)
lwz r5, 9*4(r3)
lwz r6, 10*4(r3)
lwz r7, 11*4(r3)
xor r24, r24, r0
xor r25, r25, r5
xor r26, r26, r6
xor r27, r27, r7
lwz r0, 12*4(r3)
lwz r5, 13*4(r3)
lwz r6, 14*4(r3)
lwz r7, 15*4(r3)
xor r28, r28, r0
xor r29, r29, r5
xor r30, r30, r6
xor r31, r31, r7
stw r16, 24*4(r1)
stw r17, 25*4(r1)
stw r18, 26*4(r1)
stw r19, 27*4(r1)
stw r20, 28*4(r1)
stw r21, 29*4(r1)
stw r22, 30*4(r1)
stw r23, 31*4(r1)
stw r24, 32*4(r1)
stw r25, 33*4(r1)
stw r26, 34*4(r1)
stw r27, 35*4(r1)
stw r28, 36*4(r1)
stw r29, 37*4(r1)
stw r30, 38*4(r1)
stw r31, 39*4(r1)
salsa8_core
lwz r0, 24*4(r1)
lwz r5, 25*4(r1)
lwz r6, 26*4(r1)
lwz r7, 27*4(r1)
add r16, r16, r0
add r17, r17, r5
add r18, r18, r6
add r19, r19, r7
lwz r0, 28*4(r1)
lwz r5, 29*4(r1)
lwz r6, 30*4(r1)
lwz r7, 31*4(r1)
add r20, r20, r0
add r21, r21, r5
add r22, r22, r6
add r23, r23, r7
lwz r0, 32*4(r1)
lwz r5, 33*4(r1)
lwz r6, 34*4(r1)
lwz r7, 35*4(r1)
add r24, r24, r0
add r25, r25, r5
add r26, r26, r6
add r27, r27, r7
lwz r0, 36*4(r1)
lwz r5, 37*4(r1)
lwz r6, 38*4(r1)
lwz r7, 39*4(r1)
add r28, r28, r0
add r29, r29, r5
add r30, r30, r6
add r31, r31, r7
stw r16, 24*4(r1)
stw r17, 25*4(r1)
stw r18, 26*4(r1)
stw r19, 27*4(r1)
stw r20, 28*4(r1)
stw r21, 29*4(r1)
stw r22, 30*4(r1)
stw r23, 31*4(r1)
stw r24, 32*4(r1)
stw r25, 33*4(r1)
stw r26, 34*4(r1)
stw r27, 35*4(r1)
stw r28, 36*4(r1)
stw r29, 37*4(r1)
stw r30, 38*4(r1)
stw r31, 39*4(r1)
lwz r0, 16*4(r3)
lwz r5, 17*4(r3)
lwz r6, 18*4(r3)
lwz r7, 19*4(r3)
xor r16, r16, r0
xor r17, r17, r5
xor r18, r18, r6
xor r19, r19, r7
lwz r0, 20*4(r3)
lwz r5, 21*4(r3)
lwz r6, 22*4(r3)
lwz r7, 23*4(r3)
xor r20, r20, r0
xor r21, r21, r5
xor r22, r22, r6
xor r23, r23, r7
lwz r0, 24*4(r3)
lwz r5, 25*4(r3)
lwz r6, 26*4(r3)
lwz r7, 27*4(r3)
xor r24, r24, r0
xor r25, r25, r5
xor r26, r26, r6
xor r27, r27, r7
lwz r0, 28*4(r3)
lwz r5, 29*4(r3)
lwz r6, 30*4(r3)
lwz r7, 31*4(r3)
xor r28, r28, r0
xor r29, r29, r5
xor r30, r30, r6
xor r31, r31, r7
lwz r0, 40*4(r1)
lwz r5, 41*4(r1)
lwz r6, 42*4(r1)
lwz r7, 43*4(r1)
xor r16, r16, r0
xor r17, r17, r5
xor r18, r18, r6
xor r19, r19, r7
lwz r0, 44*4(r1)
lwz r5, 45*4(r1)
lwz r6, 46*4(r1)
lwz r7, 47*4(r1)
xor r20, r20, r0
xor r21, r21, r5
xor r22, r22, r6
xor r23, r23, r7
xor r24, r24, r8
xor r25, r25, r9
xor r26, r26, r10
xor r27, r27, r11
xor r28, r28, r12
xor r29, r29, r13
xor r30, r30, r14
xor r31, r31, r15
stw r16, 40*4(r1)
stw r17, 41*4(r1)
stw r18, 42*4(r1)
stw r19, 43*4(r1)
mr r8, r24
mr r9, r25
mr r10, r26
mr r11, r27
stw r20, 44*4(r1)
stw r21, 45*4(r1)
stw r22, 46*4(r1)
stw r23, 47*4(r1)
mr r12, r28
mr r13, r29
mr r14, r30
mr r15, r31
salsa8_core
lwz r0, 40*4(r1)
lwz r5, 41*4(r1)
lwz r6, 42*4(r1)
lwz r7, 43*4(r1)
add r16, r16, r0
add r17, r17, r5
add r18, r18, r6
add r19, r19, r7
lwz r0, 44*4(r1)
lwz r5, 45*4(r1)
lwz r6, 46*4(r1)
lwz r7, 47*4(r1)
add r20, r20, r0
add r21, r21, r5
add r22, r22, r6
add r23, r23, r7
lwz r5, 2*4(r1)
add r8, r8, r24
add r9, r9, r25
add r10, r10, r26
add r11, r11, r27
add r12, r12, r28
add r13, r13, r29
add r14, r14, r30
add r15, r15, r31
stw r16, 40*4(r1)
stw r17, 41*4(r1)
stw r18, 42*4(r1)
stw r19, 43*4(r1)
stw r20, 44*4(r1)
stw r21, 45*4(r1)
stw r22, 46*4(r1)
stw r23, 47*4(r1)
bdnz scrypt_core_loop2
ld r3, 22*4(r1)
lwz r16, 24*4(r1)
lwz r17, 25*4(r1)
lwz r18, 26*4(r1)
lwz r19, 27*4(r1)
lwz r20, 28*4(r1)
lwz r21, 29*4(r1)
lwz r22, 30*4(r1)
lwz r23, 31*4(r1)
stw r16, 0*4(r3)
stw r17, 1*4(r3)
stw r18, 2*4(r3)
stw r19, 3*4(r3)
stw r20, 4*4(r3)
stw r21, 5*4(r3)
stw r22, 6*4(r3)
stw r23, 7*4(r3)
lwz r24, 32*4(r1)
lwz r25, 33*4(r1)
lwz r26, 34*4(r1)
lwz r27, 35*4(r1)
lwz r28, 36*4(r1)
lwz r29, 37*4(r1)
lwz r30, 38*4(r1)
lwz r31, 39*4(r1)
stw r24, 8*4(r3)
stw r25, 9*4(r3)
stw r26, 10*4(r3)
stw r27, 11*4(r3)
stw r28, 12*4(r3)
stw r29, 13*4(r3)
stw r30, 14*4(r3)
stw r31, 15*4(r3)
lwz r16, 40*4(r1)
lwz r17, 41*4(r1)
lwz r18, 42*4(r1)
lwz r19, 43*4(r1)
lwz r20, 44*4(r1)
lwz r21, 45*4(r1)
lwz r22, 46*4(r1)
lwz r23, 47*4(r1)
stw r16, 16*4(r3)
stw r17, 17*4(r3)
stw r18, 18*4(r3)
stw r19, 19*4(r3)
stw r20, 20*4(r3)
stw r21, 21*4(r3)
stw r22, 22*4(r3)
stw r23, 23*4(r3)
stw r8, 24*4(r3)
stw r9, 25*4(r3)
stw r10, 26*4(r3)
stw r11, 27*4(r3)
stw r12, 28*4(r3)
stw r13, 29*4(r3)
stw r14, 30*4(r3)
stw r15, 31*4(r3)
ld r13, 4*4(r1)
ld r14, 6*4(r1)
ld r15, 8*4(r1)
ld r16, 10*4(r1)
ld r17, 12*4(r1)
ld r18, 14*4(r1)
ld r19, 16*4(r1)
ld r20, 18*4(r1)
ld r21, 20*4(r1)
ld r22, 48*4(r1)
ld r23, 50*4(r1)
ld r24, 52*4(r1)
ld r25, 54*4(r1)
ld r26, 56*4(r1)
ld r27, 58*4(r1)
ld r28, 60*4(r1)
ld r29, 62*4(r1)
ld r30, 64*4(r1)
ld r31, 66*4(r1)
addi r1, r1, 68*4
blr
#endif /* __ALTIVEC__ */
#endif
07070100000025000081A4000003E800000064000000015EF4BCA1000116C0000000000000000000000000000000000000001C00000000cpuminer-2.5.1/scrypt-x64.S/*
* Copyright 2011-2014 pooler@litecoinpool.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "cpuminer-config.h"
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#if defined(USE_ASM) && defined(__x86_64__)
.text
.p2align 6
.globl scrypt_best_throughput
.globl _scrypt_best_throughput
scrypt_best_throughput:
_scrypt_best_throughput:
pushq %rbx
#if defined(USE_AVX2)
/* Check for AVX and OSXSAVE support */
movl $1, %eax
cpuid
andl $0x18000000, %ecx
cmpl $0x18000000, %ecx
jne scrypt_best_throughput_no_avx2
/* Check for AVX2 support */
movl $7, %eax
xorl %ecx, %ecx
cpuid
andl $0x00000020, %ebx
cmpl $0x00000020, %ebx
jne scrypt_best_throughput_no_avx2
/* Check for XMM and YMM state support */
xorl %ecx, %ecx
xgetbv
andl $0x00000006, %eax
cmpl $0x00000006, %eax
jne scrypt_best_throughput_no_avx2
movl $6, %eax
jmp scrypt_best_throughput_exit
scrypt_best_throughput_no_avx2:
#endif
/* Check for AuthenticAMD */
xorq %rax, %rax
cpuid
movl $3, %eax
cmpl $0x444d4163, %ecx
jne scrypt_best_throughput_not_amd
cmpl $0x69746e65, %edx
jne scrypt_best_throughput_not_amd
cmpl $0x68747541, %ebx
jne scrypt_best_throughput_not_amd
/* Check for AMD K8 or Bobcat */
movl $1, %eax
cpuid
andl $0x0ff00000, %eax
jz scrypt_best_throughput_one
cmpl $0x00500000, %eax
je scrypt_best_throughput_one
movl $3, %eax
jmp scrypt_best_throughput_exit
scrypt_best_throughput_not_amd:
/* Check for GenuineIntel */
cmpl $0x6c65746e, %ecx
jne scrypt_best_throughput_exit
cmpl $0x49656e69, %edx
jne scrypt_best_throughput_exit
cmpl $0x756e6547, %ebx
jne scrypt_best_throughput_exit
/* Check for Intel Atom */
movl $1, %eax
cpuid
movl %eax, %edx
andl $0x0ff00f00, %eax
cmpl $0x00000600, %eax
movl $3, %eax
jnz scrypt_best_throughput_exit
andl $0x000f00f0, %edx
cmpl $0x000100c0, %edx
je scrypt_best_throughput_one
cmpl $0x00020060, %edx
je scrypt_best_throughput_one
cmpl $0x00030060, %edx
jne scrypt_best_throughput_exit
scrypt_best_throughput_one:
movl $1, %eax
scrypt_best_throughput_exit:
popq %rbx
ret
.macro scrypt_shuffle src, so, dest, do
movl \so+60(\src), %eax
movl \so+44(\src), %ebx
movl \so+28(\src), %ecx
movl \so+12(\src), %edx
movl %eax, \do+12(\dest)
movl %ebx, \do+28(\dest)
movl %ecx, \do+44(\dest)
movl %edx, \do+60(\dest)
movl \so+40(\src), %eax
movl \so+8(\src), %ebx
movl \so+48(\src), %ecx
movl \so+16(\src), %edx
movl %eax, \do+8(\dest)
movl %ebx, \do+40(\dest)
movl %ecx, \do+16(\dest)
movl %edx, \do+48(\dest)
movl \so+20(\src), %eax
movl \so+4(\src), %ebx
movl \so+52(\src), %ecx
movl \so+36(\src), %edx
movl %eax, \do+4(\dest)
movl %ebx, \do+20(\dest)
movl %ecx, \do+36(\dest)
movl %edx, \do+52(\dest)
movl \so+0(\src), %eax
movl \so+24(\src), %ebx
movl \so+32(\src), %ecx
movl \so+56(\src), %edx
movl %eax, \do+0(\dest)
movl %ebx, \do+24(\dest)
movl %ecx, \do+32(\dest)
movl %edx, \do+56(\dest)
.endm
.macro salsa8_core_gen_doubleround
movq 72(%rsp), %r15
leaq (%r14, %rdx), %rbp
roll $7, %ebp
xorl %ebp, %r9d
leaq (%rdi, %r15), %rbp
roll $7, %ebp
xorl %ebp, %r10d
leaq (%rdx, %r9), %rbp
roll $9, %ebp
xorl %ebp, %r11d
leaq (%r15, %r10), %rbp
roll $9, %ebp
xorl %ebp, %r13d
leaq (%r9, %r11), %rbp
roll $13, %ebp
xorl %ebp, %r14d
leaq (%r10, %r13), %rbp
roll $13, %ebp
xorl %ebp, %edi
leaq (%r11, %r14), %rbp
roll $18, %ebp
xorl %ebp, %edx
leaq (%r13, %rdi), %rbp
roll $18, %ebp
xorl %ebp, %r15d
movq 48(%rsp), %rbp
movq %r15, 72(%rsp)
leaq (%rax, %rbp), %r15
roll $7, %r15d
xorl %r15d, %ebx
leaq (%rbp, %rbx), %r15
roll $9, %r15d
xorl %r15d, %ecx
leaq (%rbx, %rcx), %r15
roll $13, %r15d
xorl %r15d, %eax
leaq (%rcx, %rax), %r15
roll $18, %r15d
xorl %r15d, %ebp
movq 88(%rsp), %r15
movq %rbp, 48(%rsp)
leaq (%r12, %r15), %rbp
roll $7, %ebp
xorl %ebp, %esi
leaq (%r15, %rsi), %rbp
roll $9, %ebp
xorl %ebp, %r8d
leaq (%rsi, %r8), %rbp
roll $13, %ebp
xorl %ebp, %r12d
leaq (%r8, %r12), %rbp
roll $18, %ebp
xorl %ebp, %r15d
movq %r15, 88(%rsp)
movq 72(%rsp), %r15
leaq (%rsi, %rdx), %rbp
roll $7, %ebp
xorl %ebp, %edi
leaq (%r9, %r15), %rbp
roll $7, %ebp
xorl %ebp, %eax
leaq (%rdx, %rdi), %rbp
roll $9, %ebp
xorl %ebp, %ecx
leaq (%r15, %rax), %rbp
roll $9, %ebp
xorl %ebp, %r8d
leaq (%rdi, %rcx), %rbp
roll $13, %ebp
xorl %ebp, %esi
leaq (%rax, %r8), %rbp
roll $13, %ebp
xorl %ebp, %r9d
leaq (%rcx, %rsi), %rbp
roll $18, %ebp
xorl %ebp, %edx
leaq (%r8, %r9), %rbp
roll $18, %ebp
xorl %ebp, %r15d
movq 48(%rsp), %rbp
movq %r15, 72(%rsp)
leaq (%r10, %rbp), %r15
roll $7, %r15d
xorl %r15d, %r12d
leaq (%rbp, %r12), %r15
roll $9, %r15d
xorl %r15d, %r11d
leaq (%r12, %r11), %r15
roll $13, %r15d
xorl %r15d, %r10d
leaq (%r11, %r10), %r15
roll $18, %r15d
xorl %r15d, %ebp
movq 88(%rsp), %r15
movq %rbp, 48(%rsp)
leaq (%rbx, %r15), %rbp
roll $7, %ebp
xorl %ebp, %r14d
leaq (%r15, %r14), %rbp
roll $9, %ebp
xorl %ebp, %r13d
leaq (%r14, %r13), %rbp
roll $13, %ebp
xorl %ebp, %ebx
leaq (%r13, %rbx), %rbp
roll $18, %ebp
xorl %ebp, %r15d
movq %r15, 88(%rsp)
.endm
.text
.p2align 6
salsa8_core_gen:
/* 0: %rdx, %rdi, %rcx, %rsi */
movq 8(%rsp), %rdi
movq %rdi, %rdx
shrq $32, %rdi
movq 16(%rsp), %rsi
movq %rsi, %rcx
shrq $32, %rsi
/* 1: %r9, 72(%rsp), %rax, %r8 */
movq 24(%rsp), %r8
movq %r8, %r9
shrq $32, %r8
movq %r8, 72(%rsp)
movq 32(%rsp), %r8
movq %r8, %rax
shrq $32, %r8
/* 2: %r11, %r10, 48(%rsp), %r12 */
movq 40(%rsp), %r10
movq %r10, %r11
shrq $32, %r10
movq 48(%rsp), %r12
/* movq %r12, %r13 */
/* movq %r13, 48(%rsp) */
shrq $32, %r12
/* 3: %r14, %r13, %rbx, 88(%rsp) */
movq 56(%rsp), %r13
movq %r13, %r14
shrq $32, %r13
movq 64(%rsp), %r15
movq %r15, %rbx
shrq $32, %r15
movq %r15, 88(%rsp)
salsa8_core_gen_doubleround
salsa8_core_gen_doubleround
salsa8_core_gen_doubleround
salsa8_core_gen_doubleround
shlq $32, %rdi
xorq %rdi, %rdx
movq %rdx, 24(%rsp)
shlq $32, %rsi
xorq %rsi, %rcx
movq %rcx, 32(%rsp)
movl 72(%rsp), %edi
shlq $32, %rdi
xorq %rdi, %r9
movq %r9, 40(%rsp)
movl 48(%rsp), %ebp
shlq $32, %r8
xorq %r8, %rax
movq %rax, 48(%rsp)
shlq $32, %r10
xorq %r10, %r11
movq %r11, 56(%rsp)
shlq $32, %r12
xorq %r12, %rbp
movq %rbp, 64(%rsp)
shlq $32, %r13
xorq %r13, %r14
movq %r14, 72(%rsp)
movdqa 24(%rsp), %xmm0
shlq $32, %r15
xorq %r15, %rbx
movq %rbx, 80(%rsp)
movdqa 40(%rsp), %xmm1
movdqa 56(%rsp), %xmm2
movdqa 72(%rsp), %xmm3
ret
.text
.p2align 6
.globl scrypt_core
.globl _scrypt_core
scrypt_core:
_scrypt_core:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
#if defined(_WIN64) || defined(__CYGWIN__)
subq $176, %rsp
movdqa %xmm6, 8(%rsp)
movdqa %xmm7, 24(%rsp)
movdqa %xmm8, 40(%rsp)
movdqa %xmm9, 56(%rsp)
movdqa %xmm10, 72(%rsp)
movdqa %xmm11, 88(%rsp)
movdqa %xmm12, 104(%rsp)
movdqa %xmm13, 120(%rsp)
movdqa %xmm14, 136(%rsp)
movdqa %xmm15, 152(%rsp)
pushq %rdi
pushq %rsi
movq %rcx, %rdi
movq %rdx, %rsi
#else
movq %rdx, %r8
#endif
.macro scrypt_core_cleanup
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rsi
popq %rdi
movdqa 8(%rsp), %xmm6
movdqa 24(%rsp), %xmm7
movdqa 40(%rsp), %xmm8
movdqa 56(%rsp), %xmm9
movdqa 72(%rsp), %xmm10
movdqa 88(%rsp), %xmm11
movdqa 104(%rsp), %xmm12
movdqa 120(%rsp), %xmm13
movdqa 136(%rsp), %xmm14
movdqa 152(%rsp), %xmm15
addq $176, %rsp
#endif
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
.endm
/* GenuineIntel processors have fast SIMD */
xorl %eax, %eax
cpuid
cmpl $0x6c65746e, %ecx
jne scrypt_core_gen
cmpl $0x49656e69, %edx
jne scrypt_core_gen
cmpl $0x756e6547, %ebx
je scrypt_core_xmm
.p2align 6
scrypt_core_gen:
subq $136, %rsp
movdqa 0(%rdi), %xmm8
movdqa 16(%rdi), %xmm9
movdqa 32(%rdi), %xmm10
movdqa 48(%rdi), %xmm11
movdqa 64(%rdi), %xmm12
movdqa 80(%rdi), %xmm13
movdqa 96(%rdi), %xmm14
movdqa 112(%rdi), %xmm15
movq %r8, %rcx
shlq $7, %rcx
addq %rsi, %rcx
movq %r8, 96(%rsp)
movq %rdi, 104(%rsp)
movq %rsi, 112(%rsp)
movq %rcx, 120(%rsp)
scrypt_core_gen_loop1:
movdqa %xmm8, 0(%rsi)
movdqa %xmm9, 16(%rsi)
movdqa %xmm10, 32(%rsi)
movdqa %xmm11, 48(%rsi)
movdqa %xmm12, 64(%rsi)
movdqa %xmm13, 80(%rsi)
movdqa %xmm14, 96(%rsi)
movdqa %xmm15, 112(%rsi)
pxor %xmm12, %xmm8
pxor %xmm13, %xmm9
pxor %xmm14, %xmm10
pxor %xmm15, %xmm11
movdqa %xmm8, 0(%rsp)
movdqa %xmm9, 16(%rsp)
movdqa %xmm10, 32(%rsp)
movdqa %xmm11, 48(%rsp)
movq %rsi, 128(%rsp)
call salsa8_core_gen
paddd %xmm0, %xmm8
paddd %xmm1, %xmm9
paddd %xmm2, %xmm10
paddd %xmm3, %xmm11
pxor %xmm8, %xmm12
pxor %xmm9, %xmm13
pxor %xmm10, %xmm14
pxor %xmm11, %xmm15
movdqa %xmm12, 0(%rsp)
movdqa %xmm13, 16(%rsp)
movdqa %xmm14, 32(%rsp)
movdqa %xmm15, 48(%rsp)
call salsa8_core_gen
movq 128(%rsp), %rsi
paddd %xmm0, %xmm12
paddd %xmm1, %xmm13
paddd %xmm2, %xmm14
paddd %xmm3, %xmm15
addq $128, %rsi
movq 120(%rsp), %rcx
cmpq %rcx, %rsi
jne scrypt_core_gen_loop1
movq 96(%rsp), %r8
movq %r8, %rcx
subl $1, %r8d
movq %r8, 96(%rsp)
movd %xmm12, %edx
scrypt_core_gen_loop2:
movq 112(%rsp), %rsi
andl %r8d, %edx
shll $7, %edx
addq %rsi, %rdx
movdqa 0(%rdx), %xmm0
movdqa 16(%rdx), %xmm1
movdqa 32(%rdx), %xmm2
movdqa 48(%rdx), %xmm3
movdqa 64(%rdx), %xmm4
movdqa 80(%rdx), %xmm5
movdqa 96(%rdx), %xmm6
movdqa 112(%rdx), %xmm7
pxor %xmm0, %xmm8
pxor %xmm1, %xmm9
pxor %xmm2, %xmm10
pxor %xmm3, %xmm11
pxor %xmm4, %xmm12
pxor %xmm5, %xmm13
pxor %xmm6, %xmm14
pxor %xmm7, %xmm15
pxor %xmm12, %xmm8
pxor %xmm13, %xmm9
pxor %xmm14, %xmm10
pxor %xmm15, %xmm11
movdqa %xmm8, 0(%rsp)
movdqa %xmm9, 16(%rsp)
movdqa %xmm10, 32(%rsp)
movdqa %xmm11, 48(%rsp)
movq %rcx, 128(%rsp)
call salsa8_core_gen
paddd %xmm0, %xmm8
paddd %xmm1, %xmm9
paddd %xmm2, %xmm10
paddd %xmm3, %xmm11
pxor %xmm8, %xmm12
pxor %xmm9, %xmm13
pxor %xmm10, %xmm14
pxor %xmm11, %xmm15
movdqa %xmm12, 0(%rsp)
movdqa %xmm13, 16(%rsp)
movdqa %xmm14, 32(%rsp)
movdqa %xmm15, 48(%rsp)
call salsa8_core_gen
movq 96(%rsp), %r8
movq 128(%rsp), %rcx
addl 0(%rsp), %edx
paddd %xmm0, %xmm12
paddd %xmm1, %xmm13
paddd %xmm2, %xmm14
paddd %xmm3, %xmm15
subq $1, %rcx
ja scrypt_core_gen_loop2
movq 104(%rsp), %rdi
movdqa %xmm8, 0(%rdi)
movdqa %xmm9, 16(%rdi)
movdqa %xmm10, 32(%rdi)
movdqa %xmm11, 48(%rdi)
movdqa %xmm12, 64(%rdi)
movdqa %xmm13, 80(%rdi)
movdqa %xmm14, 96(%rdi)
movdqa %xmm15, 112(%rdi)
addq $136, %rsp
scrypt_core_cleanup
ret
.macro salsa8_core_xmm_doubleround
movdqa %xmm1, %xmm4
paddd %xmm0, %xmm4
movdqa %xmm4, %xmm5
pslld $7, %xmm4
psrld $25, %xmm5
pxor %xmm4, %xmm3
movdqa %xmm0, %xmm4
pxor %xmm5, %xmm3
paddd %xmm3, %xmm4
movdqa %xmm4, %xmm5
pslld $9, %xmm4
psrld $23, %xmm5
pxor %xmm4, %xmm2
movdqa %xmm3, %xmm4
pxor %xmm5, %xmm2
pshufd $0x93, %xmm3, %xmm3
paddd %xmm2, %xmm4
movdqa %xmm4, %xmm5
pslld $13, %xmm4
psrld $19, %xmm5
pxor %xmm4, %xmm1
movdqa %xmm2, %xmm4
pxor %xmm5, %xmm1
pshufd $0x4e, %xmm2, %xmm2
paddd %xmm1, %xmm4
movdqa %xmm4, %xmm5
pslld $18, %xmm4
psrld $14, %xmm5
pxor %xmm4, %xmm0
movdqa %xmm3, %xmm4
pxor %xmm5, %xmm0
pshufd $0x39, %xmm1, %xmm1
paddd %xmm0, %xmm4
movdqa %xmm4, %xmm5
pslld $7, %xmm4
psrld $25, %xmm5
pxor %xmm4, %xmm1
movdqa %xmm0, %xmm4
pxor %xmm5, %xmm1
paddd %xmm1, %xmm4
movdqa %xmm4, %xmm5
pslld $9, %xmm4
psrld $23, %xmm5
pxor %xmm4, %xmm2
movdqa %xmm1, %xmm4
pxor %xmm5, %xmm2
pshufd $0x93, %xmm1, %xmm1
paddd %xmm2, %xmm4
movdqa %xmm4, %xmm5
pslld $13, %xmm4
psrld $19, %xmm5
pxor %xmm4, %xmm3
movdqa %xmm2, %xmm4
pxor %xmm5, %xmm3
pshufd $0x4e, %xmm2, %xmm2
paddd %xmm3, %xmm4
movdqa %xmm4, %xmm5
pslld $18, %xmm4
psrld $14, %xmm5
pxor %xmm4, %xmm0
pshufd $0x39, %xmm3, %xmm3
pxor %xmm5, %xmm0
.endm
.macro salsa8_core_xmm
salsa8_core_xmm_doubleround
salsa8_core_xmm_doubleround
salsa8_core_xmm_doubleround
salsa8_core_xmm_doubleround
.endm
.p2align 6
scrypt_core_xmm:
pcmpeqw %xmm1, %xmm1
psrlq $32, %xmm1
movdqa 0(%rdi), %xmm8
movdqa 16(%rdi), %xmm11
movdqa 32(%rdi), %xmm10
movdqa 48(%rdi), %xmm9
movdqa %xmm8, %xmm0
pxor %xmm11, %xmm8
pand %xmm1, %xmm8
pxor %xmm11, %xmm8
pxor %xmm10, %xmm11
pand %xmm1, %xmm11
pxor %xmm10, %xmm11
pxor %xmm9, %xmm10
pand %xmm1, %xmm10
pxor %xmm9, %xmm10
pxor %xmm0, %xmm9
pand %xmm1, %xmm9
pxor %xmm0, %xmm9
movdqa %xmm8, %xmm0
pshufd $0x4e, %xmm10, %xmm10
punpcklqdq %xmm10, %xmm8
punpckhqdq %xmm0, %xmm10
movdqa %xmm11, %xmm0
pshufd $0x4e, %xmm9, %xmm9
punpcklqdq %xmm9, %xmm11
punpckhqdq %xmm0, %xmm9
movdqa 64(%rdi), %xmm12
movdqa 80(%rdi), %xmm15
movdqa 96(%rdi), %xmm14
movdqa 112(%rdi), %xmm13
movdqa %xmm12, %xmm0
pxor %xmm15, %xmm12
pand %xmm1, %xmm12
pxor %xmm15, %xmm12
pxor %xmm14, %xmm15
pand %xmm1, %xmm15
pxor %xmm14, %xmm15
pxor %xmm13, %xmm14
pand %xmm1, %xmm14
pxor %xmm13, %xmm14
pxor %xmm0, %xmm13
pand %xmm1, %xmm13
pxor %xmm0, %xmm13
movdqa %xmm12, %xmm0
pshufd $0x4e, %xmm14, %xmm14
punpcklqdq %xmm14, %xmm12
punpckhqdq %xmm0, %xmm14
movdqa %xmm15, %xmm0
pshufd $0x4e, %xmm13, %xmm13
punpcklqdq %xmm13, %xmm15
punpckhqdq %xmm0, %xmm13
movq %rsi, %rdx
movq %r8, %rcx
shlq $7, %rcx
addq %rsi, %rcx
scrypt_core_xmm_loop1:
pxor %xmm12, %xmm8
pxor %xmm13, %xmm9
pxor %xmm14, %xmm10
pxor %xmm15, %xmm11
movdqa %xmm8, 0(%rdx)
movdqa %xmm9, 16(%rdx)
movdqa %xmm10, 32(%rdx)
movdqa %xmm11, 48(%rdx)
movdqa %xmm12, 64(%rdx)
movdqa %xmm13, 80(%rdx)
movdqa %xmm14, 96(%rdx)
movdqa %xmm15, 112(%rdx)
movdqa %xmm8, %xmm0
movdqa %xmm9, %xmm1
movdqa %xmm10, %xmm2
movdqa %xmm11, %xmm3
salsa8_core_xmm
paddd %xmm0, %xmm8
paddd %xmm1, %xmm9
paddd %xmm2, %xmm10
paddd %xmm3, %xmm11
pxor %xmm8, %xmm12
pxor %xmm9, %xmm13
pxor %xmm10, %xmm14
pxor %xmm11, %xmm15
movdqa %xmm12, %xmm0
movdqa %xmm13, %xmm1
movdqa %xmm14, %xmm2
movdqa %xmm15, %xmm3
salsa8_core_xmm
paddd %xmm0, %xmm12
paddd %xmm1, %xmm13
paddd %xmm2, %xmm14
paddd %xmm3, %xmm15
addq $128, %rdx
cmpq %rcx, %rdx
jne scrypt_core_xmm_loop1
movq %r8, %rcx
subl $1, %r8d
scrypt_core_xmm_loop2:
movd %xmm12, %edx
andl %r8d, %edx
shll $7, %edx
pxor 0(%rsi, %rdx), %xmm8
pxor 16(%rsi, %rdx), %xmm9
pxor 32(%rsi, %rdx), %xmm10
pxor 48(%rsi, %rdx), %xmm11
pxor %xmm12, %xmm8
pxor %xmm13, %xmm9
pxor %xmm14, %xmm10
pxor %xmm15, %xmm11
movdqa %xmm8, %xmm0
movdqa %xmm9, %xmm1
movdqa %xmm10, %xmm2
movdqa %xmm11, %xmm3
salsa8_core_xmm
paddd %xmm0, %xmm8
paddd %xmm1, %xmm9
paddd %xmm2, %xmm10
paddd %xmm3, %xmm11
pxor 64(%rsi, %rdx), %xmm12
pxor 80(%rsi, %rdx), %xmm13
pxor 96(%rsi, %rdx), %xmm14
pxor 112(%rsi, %rdx), %xmm15
pxor %xmm8, %xmm12
pxor %xmm9, %xmm13
pxor %xmm10, %xmm14
pxor %xmm11, %xmm15
movdqa %xmm12, %xmm0
movdqa %xmm13, %xmm1
movdqa %xmm14, %xmm2
movdqa %xmm15, %xmm3
salsa8_core_xmm
paddd %xmm0, %xmm12
paddd %xmm1, %xmm13
paddd %xmm2, %xmm14
paddd %xmm3, %xmm15
subq $1, %rcx
ja scrypt_core_xmm_loop2
pcmpeqw %xmm1, %xmm1
psrlq $32, %xmm1
movdqa %xmm8, %xmm0
pxor %xmm9, %xmm8
pand %xmm1, %xmm8
pxor %xmm9, %xmm8
pxor %xmm10, %xmm9
pand %xmm1, %xmm9
pxor %xmm10, %xmm9
pxor %xmm11, %xmm10
pand %xmm1, %xmm10
pxor %xmm11, %xmm10
pxor %xmm0, %xmm11
pand %xmm1, %xmm11
pxor %xmm0, %xmm11
movdqa %xmm8, %xmm0
pshufd $0x4e, %xmm10, %xmm10
punpcklqdq %xmm10, %xmm8
punpckhqdq %xmm0, %xmm10
movdqa %xmm9, %xmm0
pshufd $0x4e, %xmm11, %xmm11
punpcklqdq %xmm11, %xmm9
punpckhqdq %xmm0, %xmm11
movdqa %xmm8, 0(%rdi)
movdqa %xmm11, 16(%rdi)
movdqa %xmm10, 32(%rdi)
movdqa %xmm9, 48(%rdi)
movdqa %xmm12, %xmm0
pxor %xmm13, %xmm12
pand %xmm1, %xmm12
pxor %xmm13, %xmm12
pxor %xmm14, %xmm13
pand %xmm1, %xmm13
pxor %xmm14, %xmm13
pxor %xmm15, %xmm14
pand %xmm1, %xmm14
pxor %xmm15, %xmm14
pxor %xmm0, %xmm15
pand %xmm1, %xmm15
pxor %xmm0, %xmm15
movdqa %xmm12, %xmm0
pshufd $0x4e, %xmm14, %xmm14
punpcklqdq %xmm14, %xmm12
punpckhqdq %xmm0, %xmm14
movdqa %xmm13, %xmm0
pshufd $0x4e, %xmm15, %xmm15
punpcklqdq %xmm15, %xmm13
punpckhqdq %xmm0, %xmm15
movdqa %xmm12, 64(%rdi)
movdqa %xmm15, 80(%rdi)
movdqa %xmm14, 96(%rdi)
movdqa %xmm13, 112(%rdi)
scrypt_core_cleanup
ret
#if defined(USE_AVX)
.macro salsa8_core_3way_avx_doubleround
vpaddd %xmm0, %xmm1, %xmm4
vpaddd %xmm8, %xmm9, %xmm6
vpaddd %xmm12, %xmm13, %xmm7
vpslld $7, %xmm4, %xmm5
vpsrld $25, %xmm4, %xmm4
vpxor %xmm5, %xmm3, %xmm3
vpxor %xmm4, %xmm3, %xmm3
vpslld $7, %xmm6, %xmm5
vpsrld $25, %xmm6, %xmm6
vpxor %xmm5, %xmm11, %xmm11
vpxor %xmm6, %xmm11, %xmm11
vpslld $7, %xmm7, %xmm5
vpsrld $25, %xmm7, %xmm7
vpxor %xmm5, %xmm15, %xmm15
vpxor %xmm7, %xmm15, %xmm15
vpaddd %xmm3, %xmm0, %xmm4
vpaddd %xmm11, %xmm8, %xmm6
vpaddd %xmm15, %xmm12, %xmm7
vpslld $9, %xmm4, %xmm5
vpsrld $23, %xmm4, %xmm4
vpxor %xmm5, %xmm2, %xmm2
vpxor %xmm4, %xmm2, %xmm2
vpslld $9, %xmm6, %xmm5
vpsrld $23, %xmm6, %xmm6
vpxor %xmm5, %xmm10, %xmm10
vpxor %xmm6, %xmm10, %xmm10
vpslld $9, %xmm7, %xmm5
vpsrld $23, %xmm7, %xmm7
vpxor %xmm5, %xmm14, %xmm14
vpxor %xmm7, %xmm14, %xmm14
vpaddd %xmm2, %xmm3, %xmm4
vpaddd %xmm10, %xmm11, %xmm6
vpaddd %xmm14, %xmm15, %xmm7
vpslld $13, %xmm4, %xmm5
vpsrld $19, %xmm4, %xmm4
vpshufd $0x93, %xmm3, %xmm3
vpshufd $0x93, %xmm11, %xmm11
vpshufd $0x93, %xmm15, %xmm15
vpxor %xmm5, %xmm1, %xmm1
vpxor %xmm4, %xmm1, %xmm1
vpslld $13, %xmm6, %xmm5
vpsrld $19, %xmm6, %xmm6
vpxor %xmm5, %xmm9, %xmm9
vpxor %xmm6, %xmm9, %xmm9
vpslld $13, %xmm7, %xmm5
vpsrld $19, %xmm7, %xmm7
vpxor %xmm5, %xmm13, %xmm13
vpxor %xmm7, %xmm13, %xmm13
vpaddd %xmm1, %xmm2, %xmm4
vpaddd %xmm9, %xmm10, %xmm6
vpaddd %xmm13, %xmm14, %xmm7
vpslld $18, %xmm4, %xmm5
vpsrld $14, %xmm4, %xmm4
vpshufd $0x4e, %xmm2, %xmm2
vpshufd $0x4e, %xmm10, %xmm10
vpshufd $0x4e, %xmm14, %xmm14
vpxor %xmm5, %xmm0, %xmm0
vpxor %xmm4, %xmm0, %xmm0
vpslld $18, %xmm6, %xmm5
vpsrld $14, %xmm6, %xmm6
vpxor %xmm5, %xmm8, %xmm8
vpxor %xmm6, %xmm8, %xmm8
vpslld $18, %xmm7, %xmm5
vpsrld $14, %xmm7, %xmm7
vpxor %xmm5, %xmm12, %xmm12
vpxor %xmm7, %xmm12, %xmm12
vpaddd %xmm0, %xmm3, %xmm4
vpaddd %xmm8, %xmm11, %xmm6
vpaddd %xmm12, %xmm15, %xmm7
vpslld $7, %xmm4, %xmm5
vpsrld $25, %xmm4, %xmm4
vpshufd $0x39, %xmm1, %xmm1
vpxor %xmm5, %xmm1, %xmm1
vpxor %xmm4, %xmm1, %xmm1
vpslld $7, %xmm6, %xmm5
vpsrld $25, %xmm6, %xmm6
vpshufd $0x39, %xmm9, %xmm9
vpxor %xmm5, %xmm9, %xmm9
vpxor %xmm6, %xmm9, %xmm9
vpslld $7, %xmm7, %xmm5
vpsrld $25, %xmm7, %xmm7
vpshufd $0x39, %xmm13, %xmm13
vpxor %xmm5, %xmm13, %xmm13
vpxor %xmm7, %xmm13, %xmm13
vpaddd %xmm1, %xmm0, %xmm4
vpaddd %xmm9, %xmm8, %xmm6
vpaddd %xmm13, %xmm12, %xmm7
vpslld $9, %xmm4, %xmm5
vpsrld $23, %xmm4, %xmm4
vpxor %xmm5, %xmm2, %xmm2
vpxor %xmm4, %xmm2, %xmm2
vpslld $9, %xmm6, %xmm5
vpsrld $23, %xmm6, %xmm6
vpxor %xmm5, %xmm10, %xmm10
vpxor %xmm6, %xmm10, %xmm10
vpslld $9, %xmm7, %xmm5
vpsrld $23, %xmm7, %xmm7
vpxor %xmm5, %xmm14, %xmm14
vpxor %xmm7, %xmm14, %xmm14
vpaddd %xmm2, %xmm1, %xmm4
vpaddd %xmm10, %xmm9, %xmm6
vpaddd %xmm14, %xmm13, %xmm7
vpslld $13, %xmm4, %xmm5
vpsrld $19, %xmm4, %xmm4
vpshufd $0x93, %xmm1, %xmm1
vpshufd $0x93, %xmm9, %xmm9
vpshufd $0x93, %xmm13, %xmm13
vpxor %xmm5, %xmm3, %xmm3
vpxor %xmm4, %xmm3, %xmm3
vpslld $13, %xmm6, %xmm5
vpsrld $19, %xmm6, %xmm6
vpxor %xmm5, %xmm11, %xmm11
vpxor %xmm6, %xmm11, %xmm11
vpslld $13, %xmm7, %xmm5
vpsrld $19, %xmm7, %xmm7
vpxor %xmm5, %xmm15, %xmm15
vpxor %xmm7, %xmm15, %xmm15
vpaddd %xmm3, %xmm2, %xmm4
vpaddd %xmm11, %xmm10, %xmm6
vpaddd %xmm15, %xmm14, %xmm7
vpslld $18, %xmm4, %xmm5
vpsrld $14, %xmm4, %xmm4
vpshufd $0x4e, %xmm2, %xmm2
vpshufd $0x4e, %xmm10, %xmm10
vpxor %xmm5, %xmm0, %xmm0
vpxor %xmm4, %xmm0, %xmm0
vpslld $18, %xmm6, %xmm5
vpsrld $14, %xmm6, %xmm6
vpshufd $0x4e, %xmm14, %xmm14
vpshufd $0x39, %xmm11, %xmm11
vpxor %xmm5, %xmm8, %xmm8
vpxor %xmm6, %xmm8, %xmm8
vpslld $18, %xmm7, %xmm5
vpsrld $14, %xmm7, %xmm7
vpshufd $0x39, %xmm3, %xmm3
vpshufd $0x39, %xmm15, %xmm15
vpxor %xmm5, %xmm12, %xmm12
vpxor %xmm7, %xmm12, %xmm12
.endm
.macro salsa8_core_3way_avx
salsa8_core_3way_avx_doubleround
salsa8_core_3way_avx_doubleround
salsa8_core_3way_avx_doubleround
salsa8_core_3way_avx_doubleround
.endm
#endif /* USE_AVX */
.text
.p2align 6
.globl scrypt_core_3way
.globl _scrypt_core_3way
scrypt_core_3way:
_scrypt_core_3way:
pushq %rbx
pushq %rbp
#if defined(_WIN64) || defined(__CYGWIN__)
subq $176, %rsp
movdqa %xmm6, 8(%rsp)
movdqa %xmm7, 24(%rsp)
movdqa %xmm8, 40(%rsp)
movdqa %xmm9, 56(%rsp)
movdqa %xmm10, 72(%rsp)
movdqa %xmm11, 88(%rsp)
movdqa %xmm12, 104(%rsp)
movdqa %xmm13, 120(%rsp)
movdqa %xmm14, 136(%rsp)
movdqa %xmm15, 152(%rsp)
pushq %rdi
pushq %rsi
movq %rcx, %rdi
movq %rdx, %rsi
#else
movq %rdx, %r8
#endif
subq $392, %rsp
.macro scrypt_core_3way_cleanup
addq $392, %rsp
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rsi
popq %rdi
movdqa 8(%rsp), %xmm6
movdqa 24(%rsp), %xmm7
movdqa 40(%rsp), %xmm8
movdqa 56(%rsp), %xmm9
movdqa 72(%rsp), %xmm10
movdqa 88(%rsp), %xmm11
movdqa 104(%rsp), %xmm12
movdqa 120(%rsp), %xmm13
movdqa 136(%rsp), %xmm14
movdqa 152(%rsp), %xmm15
addq $176, %rsp
#endif
popq %rbp
popq %rbx
.endm
#if !defined(USE_AVX)
jmp scrypt_core_3way_xmm
#else
/* Check for AVX and OSXSAVE support */
movl $1, %eax
cpuid
andl $0x18000000, %ecx
cmpl $0x18000000, %ecx
jne scrypt_core_3way_xmm
/* Check for XMM and YMM state support */
xorl %ecx, %ecx
xgetbv
andl $0x00000006, %eax
cmpl $0x00000006, %eax
jne scrypt_core_3way_xmm
#if defined(USE_XOP)
/* Check for XOP support */
movl $0x80000001, %eax
cpuid
andl $0x00000800, %ecx
jnz scrypt_core_3way_xop
#endif
scrypt_core_3way_avx:
scrypt_shuffle %rdi, 0, %rsp, 0
scrypt_shuffle %rdi, 64, %rsp, 64
scrypt_shuffle %rdi, 128, %rsp, 128
scrypt_shuffle %rdi, 192, %rsp, 192
scrypt_shuffle %rdi, 256, %rsp, 256
scrypt_shuffle %rdi, 320, %rsp, 320
movdqa 64(%rsp), %xmm0
movdqa 80(%rsp), %xmm1
movdqa 96(%rsp), %xmm2
movdqa 112(%rsp), %xmm3
movdqa 128+64(%rsp), %xmm8
movdqa 128+80(%rsp), %xmm9
movdqa 128+96(%rsp), %xmm10
movdqa 128+112(%rsp), %xmm11
movdqa 256+64(%rsp), %xmm12
movdqa 256+80(%rsp), %xmm13
movdqa 256+96(%rsp), %xmm14
movdqa 256+112(%rsp), %xmm15
movq %rsi, %rbx
leaq (%r8, %r8, 2), %rax
shlq $7, %rax
addq %rsi, %rax
scrypt_core_3way_avx_loop1:
movdqa %xmm0, 64(%rbx)
movdqa %xmm1, 80(%rbx)
movdqa %xmm2, 96(%rbx)
movdqa %xmm3, 112(%rbx)
pxor 0(%rsp), %xmm0
pxor 16(%rsp), %xmm1
pxor 32(%rsp), %xmm2
pxor 48(%rsp), %xmm3
movdqa %xmm8, 128+64(%rbx)
movdqa %xmm9, 128+80(%rbx)
movdqa %xmm10, 128+96(%rbx)
movdqa %xmm11, 128+112(%rbx)
pxor 128+0(%rsp), %xmm8
pxor 128+16(%rsp), %xmm9
pxor 128+32(%rsp), %xmm10
pxor 128+48(%rsp), %xmm11
movdqa %xmm12, 256+64(%rbx)
movdqa %xmm13, 256+80(%rbx)
movdqa %xmm14, 256+96(%rbx)
movdqa %xmm15, 256+112(%rbx)
pxor 256+0(%rsp), %xmm12
pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15
movdqa %xmm0, 0(%rbx)
movdqa %xmm1, 16(%rbx)
movdqa %xmm2, 32(%rbx)
movdqa %xmm3, 48(%rbx)
movdqa %xmm8, 128+0(%rbx)
movdqa %xmm9, 128+16(%rbx)
movdqa %xmm10, 128+32(%rbx)
movdqa %xmm11, 128+48(%rbx)
movdqa %xmm12, 256+0(%rbx)
movdqa %xmm13, 256+16(%rbx)
movdqa %xmm14, 256+32(%rbx)
movdqa %xmm15, 256+48(%rbx)
salsa8_core_3way_avx
paddd 0(%rbx), %xmm0
paddd 16(%rbx), %xmm1
paddd 32(%rbx), %xmm2
paddd 48(%rbx), %xmm3
paddd 128+0(%rbx), %xmm8
paddd 128+16(%rbx), %xmm9
paddd 128+32(%rbx), %xmm10
paddd 128+48(%rbx), %xmm11
paddd 256+0(%rbx), %xmm12
paddd 256+16(%rbx), %xmm13
paddd 256+32(%rbx), %xmm14
paddd 256+48(%rbx), %xmm15
movdqa %xmm0, 0(%rsp)
movdqa %xmm1, 16(%rsp)
movdqa %xmm2, 32(%rsp)
movdqa %xmm3, 48(%rsp)
movdqa %xmm8, 128+0(%rsp)
movdqa %xmm9, 128+16(%rsp)
movdqa %xmm10, 128+32(%rsp)
movdqa %xmm11, 128+48(%rsp)
movdqa %xmm12, 256+0(%rsp)
movdqa %xmm13, 256+16(%rsp)
movdqa %xmm14, 256+32(%rsp)
movdqa %xmm15, 256+48(%rsp)
pxor 64(%rbx), %xmm0
pxor 80(%rbx), %xmm1
pxor 96(%rbx), %xmm2
pxor 112(%rbx), %xmm3
pxor 128+64(%rbx), %xmm8
pxor 128+80(%rbx), %xmm9
pxor 128+96(%rbx), %xmm10
pxor 128+112(%rbx), %xmm11
pxor 256+64(%rbx), %xmm12
pxor 256+80(%rbx), %xmm13
pxor 256+96(%rbx), %xmm14
pxor 256+112(%rbx), %xmm15
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
salsa8_core_3way_avx
paddd 64(%rsp), %xmm0
paddd 80(%rsp), %xmm1
paddd 96(%rsp), %xmm2
paddd 112(%rsp), %xmm3
paddd 128+64(%rsp), %xmm8
paddd 128+80(%rsp), %xmm9
paddd 128+96(%rsp), %xmm10
paddd 128+112(%rsp), %xmm11
paddd 256+64(%rsp), %xmm12
paddd 256+80(%rsp), %xmm13
paddd 256+96(%rsp), %xmm14
paddd 256+112(%rsp), %xmm15
addq $3*128, %rbx
cmpq %rax, %rbx
jne scrypt_core_3way_avx_loop1
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
movq %r8, %rcx
subq $1, %r8
scrypt_core_3way_avx_loop2:
movd %xmm0, %ebp
movd %xmm8, %ebx
movd %xmm12, %eax
pxor 0(%rsp), %xmm0
pxor 16(%rsp), %xmm1
pxor 32(%rsp), %xmm2
pxor 48(%rsp), %xmm3
pxor 128+0(%rsp), %xmm8
pxor 128+16(%rsp), %xmm9
pxor 128+32(%rsp), %xmm10
pxor 128+48(%rsp), %xmm11
pxor 256+0(%rsp), %xmm12
pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15
andl %r8d, %ebp
leaq (%rbp, %rbp, 2), %rbp
shll $7, %ebp
andl %r8d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx
shll $7, %ebx
andl %r8d, %eax
leaq 2(%rax, %rax, 2), %rax
shll $7, %eax
pxor 0(%rsi, %rbp), %xmm0
pxor 16(%rsi, %rbp), %xmm1
pxor 32(%rsi, %rbp), %xmm2
pxor 48(%rsi, %rbp), %xmm3
pxor 0(%rsi, %rbx), %xmm8
pxor 16(%rsi, %rbx), %xmm9
pxor 32(%rsi, %rbx), %xmm10
pxor 48(%rsi, %rbx), %xmm11
pxor 0(%rsi, %rax), %xmm12
pxor 16(%rsi, %rax), %xmm13
pxor 32(%rsi, %rax), %xmm14
pxor 48(%rsi, %rax), %xmm15
movdqa %xmm0, 0(%rsp)
movdqa %xmm1, 16(%rsp)
movdqa %xmm2, 32(%rsp)
movdqa %xmm3, 48(%rsp)
movdqa %xmm8, 128+0(%rsp)
movdqa %xmm9, 128+16(%rsp)
movdqa %xmm10, 128+32(%rsp)
movdqa %xmm11, 128+48(%rsp)
movdqa %xmm12, 256+0(%rsp)
movdqa %xmm13, 256+16(%rsp)
movdqa %xmm14, 256+32(%rsp)
movdqa %xmm15, 256+48(%rsp)
salsa8_core_3way_avx
paddd 0(%rsp), %xmm0
paddd 16(%rsp), %xmm1
paddd 32(%rsp), %xmm2
paddd 48(%rsp), %xmm3
paddd 128+0(%rsp), %xmm8
paddd 128+16(%rsp), %xmm9
paddd 128+32(%rsp), %xmm10
paddd 128+48(%rsp), %xmm11
paddd 256+0(%rsp), %xmm12
paddd 256+16(%rsp), %xmm13
paddd 256+32(%rsp), %xmm14
paddd 256+48(%rsp), %xmm15
movdqa %xmm0, 0(%rsp)
movdqa %xmm1, 16(%rsp)
movdqa %xmm2, 32(%rsp)
movdqa %xmm3, 48(%rsp)
movdqa %xmm8, 128+0(%rsp)
movdqa %xmm9, 128+16(%rsp)
movdqa %xmm10, 128+32(%rsp)
movdqa %xmm11, 128+48(%rsp)
movdqa %xmm12, 256+0(%rsp)
movdqa %xmm13, 256+16(%rsp)
movdqa %xmm14, 256+32(%rsp)
movdqa %xmm15, 256+48(%rsp)
pxor 64(%rsi, %rbp), %xmm0
pxor 80(%rsi, %rbp), %xmm1
pxor 96(%rsi, %rbp), %xmm2
pxor 112(%rsi, %rbp), %xmm3
pxor 64(%rsi, %rbx), %xmm8
pxor 80(%rsi, %rbx), %xmm9
pxor 96(%rsi, %rbx), %xmm10
pxor 112(%rsi, %rbx), %xmm11
pxor 64(%rsi, %rax), %xmm12
pxor 80(%rsi, %rax), %xmm13
pxor 96(%rsi, %rax), %xmm14
pxor 112(%rsi, %rax), %xmm15
pxor 64(%rsp), %xmm0
pxor 80(%rsp), %xmm1
pxor 96(%rsp), %xmm2
pxor 112(%rsp), %xmm3
pxor 128+64(%rsp), %xmm8
pxor 128+80(%rsp), %xmm9
pxor 128+96(%rsp), %xmm10
pxor 128+112(%rsp), %xmm11
pxor 256+64(%rsp), %xmm12
pxor 256+80(%rsp), %xmm13
pxor 256+96(%rsp), %xmm14
pxor 256+112(%rsp), %xmm15
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
salsa8_core_3way_avx
paddd 64(%rsp), %xmm0
paddd 80(%rsp), %xmm1
paddd 96(%rsp), %xmm2
paddd 112(%rsp), %xmm3
paddd 128+64(%rsp), %xmm8
paddd 128+80(%rsp), %xmm9
paddd 128+96(%rsp), %xmm10
paddd 128+112(%rsp), %xmm11
paddd 256+64(%rsp), %xmm12
paddd 256+80(%rsp), %xmm13
paddd 256+96(%rsp), %xmm14
paddd 256+112(%rsp), %xmm15
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
subq $1, %rcx
ja scrypt_core_3way_avx_loop2
scrypt_shuffle %rsp, 0, %rdi, 0
scrypt_shuffle %rsp, 64, %rdi, 64
scrypt_shuffle %rsp, 128, %rdi, 128
scrypt_shuffle %rsp, 192, %rdi, 192
scrypt_shuffle %rsp, 256, %rdi, 256
scrypt_shuffle %rsp, 320, %rdi, 320
scrypt_core_3way_cleanup
ret
#if defined(USE_XOP)
.macro salsa8_core_3way_xop_doubleround
vpaddd %xmm0, %xmm1, %xmm4
vpaddd %xmm8, %xmm9, %xmm6
vpaddd %xmm12, %xmm13, %xmm7
vprotd $7, %xmm4, %xmm4
vprotd $7, %xmm6, %xmm6
vprotd $7, %xmm7, %xmm7
vpxor %xmm4, %xmm3, %xmm3
vpxor %xmm6, %xmm11, %xmm11
vpxor %xmm7, %xmm15, %xmm15
vpaddd %xmm3, %xmm0, %xmm4
vpaddd %xmm11, %xmm8, %xmm6
vpaddd %xmm15, %xmm12, %xmm7
vprotd $9, %xmm4, %xmm4
vprotd $9, %xmm6, %xmm6
vprotd $9, %xmm7, %xmm7
vpxor %xmm4, %xmm2, %xmm2
vpxor %xmm6, %xmm10, %xmm10
vpxor %xmm7, %xmm14, %xmm14
vpaddd %xmm2, %xmm3, %xmm4
vpaddd %xmm10, %xmm11, %xmm6
vpaddd %xmm14, %xmm15, %xmm7
vprotd $13, %xmm4, %xmm4
vprotd $13, %xmm6, %xmm6
vprotd $13, %xmm7, %xmm7
vpshufd $0x93, %xmm3, %xmm3
vpshufd $0x93, %xmm11, %xmm11
vpshufd $0x93, %xmm15, %xmm15
vpxor %xmm4, %xmm1, %xmm1
vpxor %xmm6, %xmm9, %xmm9
vpxor %xmm7, %xmm13, %xmm13
vpaddd %xmm1, %xmm2, %xmm4
vpaddd %xmm9, %xmm10, %xmm6
vpaddd %xmm13, %xmm14, %xmm7
vprotd $18, %xmm4, %xmm4
vprotd $18, %xmm6, %xmm6
vprotd $18, %xmm7, %xmm7
vpshufd $0x4e, %xmm2, %xmm2
vpshufd $0x4e, %xmm10, %xmm10
vpshufd $0x4e, %xmm14, %xmm14
vpxor %xmm6, %xmm8, %xmm8
vpxor %xmm4, %xmm0, %xmm0
vpxor %xmm7, %xmm12, %xmm12
vpaddd %xmm0, %xmm3, %xmm4
vpaddd %xmm8, %xmm11, %xmm6
vpaddd %xmm12, %xmm15, %xmm7
vprotd $7, %xmm4, %xmm4
vprotd $7, %xmm6, %xmm6
vprotd $7, %xmm7, %xmm7
vpshufd $0x39, %xmm1, %xmm1
vpshufd $0x39, %xmm9, %xmm9
vpshufd $0x39, %xmm13, %xmm13
vpxor %xmm4, %xmm1, %xmm1
vpxor %xmm6, %xmm9, %xmm9
vpxor %xmm7, %xmm13, %xmm13
vpaddd %xmm1, %xmm0, %xmm4
vpaddd %xmm9, %xmm8, %xmm6
vpaddd %xmm13, %xmm12, %xmm7
vprotd $9, %xmm4, %xmm4
vprotd $9, %xmm6, %xmm6
vprotd $9, %xmm7, %xmm7
vpxor %xmm4, %xmm2, %xmm2
vpxor %xmm6, %xmm10, %xmm10
vpxor %xmm7, %xmm14, %xmm14
vpaddd %xmm2, %xmm1, %xmm4
vpaddd %xmm10, %xmm9, %xmm6
vpaddd %xmm14, %xmm13, %xmm7
vprotd $13, %xmm4, %xmm4
vprotd $13, %xmm6, %xmm6
vprotd $13, %xmm7, %xmm7
vpshufd $0x93, %xmm1, %xmm1
vpshufd $0x93, %xmm9, %xmm9
vpshufd $0x93, %xmm13, %xmm13
vpxor %xmm4, %xmm3, %xmm3
vpxor %xmm6, %xmm11, %xmm11
vpxor %xmm7, %xmm15, %xmm15
vpaddd %xmm3, %xmm2, %xmm4
vpaddd %xmm11, %xmm10, %xmm6
vpaddd %xmm15, %xmm14, %xmm7
vprotd $18, %xmm4, %xmm4
vprotd $18, %xmm6, %xmm6
vprotd $18, %xmm7, %xmm7
vpshufd $0x4e, %xmm2, %xmm2
vpshufd $0x4e, %xmm10, %xmm10
vpshufd $0x4e, %xmm14, %xmm14
vpxor %xmm4, %xmm0, %xmm0
vpxor %xmm6, %xmm8, %xmm8
vpxor %xmm7, %xmm12, %xmm12
vpshufd $0x39, %xmm3, %xmm3
vpshufd $0x39, %xmm11, %xmm11
vpshufd $0x39, %xmm15, %xmm15
.endm
.macro salsa8_core_3way_xop
salsa8_core_3way_xop_doubleround
salsa8_core_3way_xop_doubleround
salsa8_core_3way_xop_doubleround
salsa8_core_3way_xop_doubleround
.endm
.p2align 6
scrypt_core_3way_xop:
scrypt_shuffle %rdi, 0, %rsp, 0
scrypt_shuffle %rdi, 64, %rsp, 64
scrypt_shuffle %rdi, 128, %rsp, 128
scrypt_shuffle %rdi, 192, %rsp, 192
scrypt_shuffle %rdi, 256, %rsp, 256
scrypt_shuffle %rdi, 320, %rsp, 320
movdqa 64(%rsp), %xmm0
movdqa 80(%rsp), %xmm1
movdqa 96(%rsp), %xmm2
movdqa 112(%rsp), %xmm3
movdqa 128+64(%rsp), %xmm8
movdqa 128+80(%rsp), %xmm9
movdqa 128+96(%rsp), %xmm10
movdqa 128+112(%rsp), %xmm11
movdqa 256+64(%rsp), %xmm12
movdqa 256+80(%rsp), %xmm13
movdqa 256+96(%rsp), %xmm14
movdqa 256+112(%rsp), %xmm15
movq %rsi, %rbx
leaq (%r8, %r8, 2), %rax
shlq $7, %rax
addq %rsi, %rax
scrypt_core_3way_xop_loop1:
movdqa %xmm0, 64(%rbx)
movdqa %xmm1, 80(%rbx)
movdqa %xmm2, 96(%rbx)
movdqa %xmm3, 112(%rbx)
pxor 0(%rsp), %xmm0
pxor 16(%rsp), %xmm1
pxor 32(%rsp), %xmm2
pxor 48(%rsp), %xmm3
movdqa %xmm8, 128+64(%rbx)
movdqa %xmm9, 128+80(%rbx)
movdqa %xmm10, 128+96(%rbx)
movdqa %xmm11, 128+112(%rbx)
pxor 128+0(%rsp), %xmm8
pxor 128+16(%rsp), %xmm9
pxor 128+32(%rsp), %xmm10
pxor 128+48(%rsp), %xmm11
movdqa %xmm12, 256+64(%rbx)
movdqa %xmm13, 256+80(%rbx)
movdqa %xmm14, 256+96(%rbx)
movdqa %xmm15, 256+112(%rbx)
pxor 256+0(%rsp), %xmm12
pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15
movdqa %xmm0, 0(%rbx)
movdqa %xmm1, 16(%rbx)
movdqa %xmm2, 32(%rbx)
movdqa %xmm3, 48(%rbx)
movdqa %xmm8, 128+0(%rbx)
movdqa %xmm9, 128+16(%rbx)
movdqa %xmm10, 128+32(%rbx)
movdqa %xmm11, 128+48(%rbx)
movdqa %xmm12, 256+0(%rbx)
movdqa %xmm13, 256+16(%rbx)
movdqa %xmm14, 256+32(%rbx)
movdqa %xmm15, 256+48(%rbx)
salsa8_core_3way_xop
paddd 0(%rbx), %xmm0
paddd 16(%rbx), %xmm1
paddd 32(%rbx), %xmm2
paddd 48(%rbx), %xmm3
paddd 128+0(%rbx), %xmm8
paddd 128+16(%rbx), %xmm9
paddd 128+32(%rbx), %xmm10
paddd 128+48(%rbx), %xmm11
paddd 256+0(%rbx), %xmm12
paddd 256+16(%rbx), %xmm13
paddd 256+32(%rbx), %xmm14
paddd 256+48(%rbx), %xmm15
movdqa %xmm0, 0(%rsp)
movdqa %xmm1, 16(%rsp)
movdqa %xmm2, 32(%rsp)
movdqa %xmm3, 48(%rsp)
movdqa %xmm8, 128+0(%rsp)
movdqa %xmm9, 128+16(%rsp)
movdqa %xmm10, 128+32(%rsp)
movdqa %xmm11, 128+48(%rsp)
movdqa %xmm12, 256+0(%rsp)
movdqa %xmm13, 256+16(%rsp)
movdqa %xmm14, 256+32(%rsp)
movdqa %xmm15, 256+48(%rsp)
pxor 64(%rbx), %xmm0
pxor 80(%rbx), %xmm1
pxor 96(%rbx), %xmm2
pxor 112(%rbx), %xmm3
pxor 128+64(%rbx), %xmm8
pxor 128+80(%rbx), %xmm9
pxor 128+96(%rbx), %xmm10
pxor 128+112(%rbx), %xmm11
pxor 256+64(%rbx), %xmm12
pxor 256+80(%rbx), %xmm13
pxor 256+96(%rbx), %xmm14
pxor 256+112(%rbx), %xmm15
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
salsa8_core_3way_xop
paddd 64(%rsp), %xmm0
paddd 80(%rsp), %xmm1
paddd 96(%rsp), %xmm2
paddd 112(%rsp), %xmm3
paddd 128+64(%rsp), %xmm8
paddd 128+80(%rsp), %xmm9
paddd 128+96(%rsp), %xmm10
paddd 128+112(%rsp), %xmm11
paddd 256+64(%rsp), %xmm12
paddd 256+80(%rsp), %xmm13
paddd 256+96(%rsp), %xmm14
paddd 256+112(%rsp), %xmm15
addq $3*128, %rbx
cmpq %rax, %rbx
jne scrypt_core_3way_xop_loop1
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
movq %r8, %rcx
subq $1, %r8
scrypt_core_3way_xop_loop2:
movd %xmm0, %ebp
movd %xmm8, %ebx
movd %xmm12, %eax
pxor 0(%rsp), %xmm0
pxor 16(%rsp), %xmm1
pxor 32(%rsp), %xmm2
pxor 48(%rsp), %xmm3
pxor 128+0(%rsp), %xmm8
pxor 128+16(%rsp), %xmm9
pxor 128+32(%rsp), %xmm10
pxor 128+48(%rsp), %xmm11
pxor 256+0(%rsp), %xmm12
pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15
andl %r8d, %ebp
leaq (%rbp, %rbp, 2), %rbp
shll $7, %ebp
andl %r8d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx
shll $7, %ebx
andl %r8d, %eax
leaq 2(%rax, %rax, 2), %rax
shll $7, %eax
pxor 0(%rsi, %rbp), %xmm0
pxor 16(%rsi, %rbp), %xmm1
pxor 32(%rsi, %rbp), %xmm2
pxor 48(%rsi, %rbp), %xmm3
pxor 0(%rsi, %rbx), %xmm8
pxor 16(%rsi, %rbx), %xmm9
pxor 32(%rsi, %rbx), %xmm10
pxor 48(%rsi, %rbx), %xmm11
pxor 0(%rsi, %rax), %xmm12
pxor 16(%rsi, %rax), %xmm13
pxor 32(%rsi, %rax), %xmm14
pxor 48(%rsi, %rax), %xmm15
movdqa %xmm0, 0(%rsp)
movdqa %xmm1, 16(%rsp)
movdqa %xmm2, 32(%rsp)
movdqa %xmm3, 48(%rsp)
movdqa %xmm8, 128+0(%rsp)
movdqa %xmm9, 128+16(%rsp)
movdqa %xmm10, 128+32(%rsp)
movdqa %xmm11, 128+48(%rsp)
movdqa %xmm12, 256+0(%rsp)
movdqa %xmm13, 256+16(%rsp)
movdqa %xmm14, 256+32(%rsp)
movdqa %xmm15, 256+48(%rsp)
salsa8_core_3way_xop
paddd 0(%rsp), %xmm0
paddd 16(%rsp), %xmm1
paddd 32(%rsp), %xmm2
paddd 48(%rsp), %xmm3
paddd 128+0(%rsp), %xmm8
paddd 128+16(%rsp), %xmm9
paddd 128+32(%rsp), %xmm10
paddd 128+48(%rsp), %xmm11
paddd 256+0(%rsp), %xmm12
paddd 256+16(%rsp), %xmm13
paddd 256+32(%rsp), %xmm14
paddd 256+48(%rsp), %xmm15
movdqa %xmm0, 0(%rsp)
movdqa %xmm1, 16(%rsp)
movdqa %xmm2, 32(%rsp)
movdqa %xmm3, 48(%rsp)
movdqa %xmm8, 128+0(%rsp)
movdqa %xmm9, 128+16(%rsp)
movdqa %xmm10, 128+32(%rsp)
movdqa %xmm11, 128+48(%rsp)
movdqa %xmm12, 256+0(%rsp)
movdqa %xmm13, 256+16(%rsp)
movdqa %xmm14, 256+32(%rsp)
movdqa %xmm15, 256+48(%rsp)
pxor 64(%rsi, %rbp), %xmm0
pxor 80(%rsi, %rbp), %xmm1
pxor 96(%rsi, %rbp), %xmm2
pxor 112(%rsi, %rbp), %xmm3
pxor 64(%rsi, %rbx), %xmm8
pxor 80(%rsi, %rbx), %xmm9
pxor 96(%rsi, %rbx), %xmm10
pxor 112(%rsi, %rbx), %xmm11
pxor 64(%rsi, %rax), %xmm12
pxor 80(%rsi, %rax), %xmm13
pxor 96(%rsi, %rax), %xmm14
pxor 112(%rsi, %rax), %xmm15
pxor 64(%rsp), %xmm0
pxor 80(%rsp), %xmm1
pxor 96(%rsp), %xmm2
pxor 112(%rsp), %xmm3
pxor 128+64(%rsp), %xmm8
pxor 128+80(%rsp), %xmm9
pxor 128+96(%rsp), %xmm10
pxor 128+112(%rsp), %xmm11
pxor 256+64(%rsp), %xmm12
pxor 256+80(%rsp), %xmm13
pxor 256+96(%rsp), %xmm14
pxor 256+112(%rsp), %xmm15
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
salsa8_core_3way_xop
paddd 64(%rsp), %xmm0
paddd 80(%rsp), %xmm1
paddd 96(%rsp), %xmm2
paddd 112(%rsp), %xmm3
paddd 128+64(%rsp), %xmm8
paddd 128+80(%rsp), %xmm9
paddd 128+96(%rsp), %xmm10
paddd 128+112(%rsp), %xmm11
paddd 256+64(%rsp), %xmm12
paddd 256+80(%rsp), %xmm13
paddd 256+96(%rsp), %xmm14
paddd 256+112(%rsp), %xmm15
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
subq $1, %rcx
ja scrypt_core_3way_xop_loop2
scrypt_shuffle %rsp, 0, %rdi, 0
scrypt_shuffle %rsp, 64, %rdi, 64
scrypt_shuffle %rsp, 128, %rdi, 128
scrypt_shuffle %rsp, 192, %rdi, 192
scrypt_shuffle %rsp, 256, %rdi, 256
scrypt_shuffle %rsp, 320, %rdi, 320
scrypt_core_3way_cleanup
ret
#endif /* USE_XOP */
#endif /* USE_AVX */
.macro salsa8_core_3way_xmm_doubleround
movdqa %xmm1, %xmm4
movdqa %xmm9, %xmm6
movdqa %xmm13, %xmm7
paddd %xmm0, %xmm4
paddd %xmm8, %xmm6
paddd %xmm12, %xmm7
movdqa %xmm4, %xmm5
pslld $7, %xmm4
psrld $25, %xmm5
pxor %xmm4, %xmm3
pxor %xmm5, %xmm3
movdqa %xmm0, %xmm4
movdqa %xmm6, %xmm5
pslld $7, %xmm6
psrld $25, %xmm5
pxor %xmm6, %xmm11
pxor %xmm5, %xmm11
movdqa %xmm8, %xmm6
movdqa %xmm7, %xmm5
pslld $7, %xmm7
psrld $25, %xmm5
pxor %xmm7, %xmm15
pxor %xmm5, %xmm15
movdqa %xmm12, %xmm7
paddd %xmm3, %xmm4
paddd %xmm11, %xmm6
paddd %xmm15, %xmm7
movdqa %xmm4, %xmm5
pslld $9, %xmm4
psrld $23, %xmm5
pxor %xmm4, %xmm2
movdqa %xmm3, %xmm4
pshufd $0x93, %xmm3, %xmm3
pxor %xmm5, %xmm2
movdqa %xmm6, %xmm5
pslld $9, %xmm6
psrld $23, %xmm5
pxor %xmm6, %xmm10
movdqa %xmm11, %xmm6
pshufd $0x93, %xmm11, %xmm11
pxor %xmm5, %xmm10
movdqa %xmm7, %xmm5
pslld $9, %xmm7
psrld $23, %xmm5
pxor %xmm7, %xmm14
movdqa %xmm15, %xmm7
pxor %xmm5, %xmm14
pshufd $0x93, %xmm15, %xmm15
paddd %xmm2, %xmm4
paddd %xmm10, %xmm6
paddd %xmm14, %xmm7
movdqa %xmm4, %xmm5
pslld $13, %xmm4
psrld $19, %xmm5
pxor %xmm4, %xmm1
movdqa %xmm2, %xmm4
pshufd $0x4e, %xmm2, %xmm2
pxor %xmm5, %xmm1
movdqa %xmm6, %xmm5
pslld $13, %xmm6
psrld $19, %xmm5
pxor %xmm6, %xmm9
movdqa %xmm10, %xmm6
pshufd $0x4e, %xmm10, %xmm10
pxor %xmm5, %xmm9
movdqa %xmm7, %xmm5
pslld $13, %xmm7
psrld $19, %xmm5
pxor %xmm7, %xmm13
movdqa %xmm14, %xmm7
pshufd $0x4e, %xmm14, %xmm14
pxor %xmm5, %xmm13
paddd %xmm1, %xmm4
paddd %xmm9, %xmm6
paddd %xmm13, %xmm7
movdqa %xmm4, %xmm5
pslld $18, %xmm4
psrld $14, %xmm5
pxor %xmm4, %xmm0
pshufd $0x39, %xmm1, %xmm1
pxor %xmm5, %xmm0
movdqa %xmm3, %xmm4
movdqa %xmm6, %xmm5
pslld $18, %xmm6
psrld $14, %xmm5
pxor %xmm6, %xmm8
pshufd $0x39, %xmm9, %xmm9
pxor %xmm5, %xmm8
movdqa %xmm11, %xmm6
movdqa %xmm7, %xmm5
pslld $18, %xmm7
psrld $14, %xmm5
pxor %xmm7, %xmm12
movdqa %xmm15, %xmm7
pxor %xmm5, %xmm12
pshufd $0x39, %xmm13, %xmm13
paddd %xmm0, %xmm4
paddd %xmm8, %xmm6
paddd %xmm12, %xmm7
movdqa %xmm4, %xmm5
pslld $7, %xmm4
psrld $25, %xmm5
pxor %xmm4, %xmm1
pxor %xmm5, %xmm1
movdqa %xmm0, %xmm4
movdqa %xmm6, %xmm5
pslld $7, %xmm6
psrld $25, %xmm5
pxor %xmm6, %xmm9
pxor %xmm5, %xmm9
movdqa %xmm8, %xmm6
movdqa %xmm7, %xmm5
pslld $7, %xmm7
psrld $25, %xmm5
pxor %xmm7, %xmm13
pxor %xmm5, %xmm13
movdqa %xmm12, %xmm7
paddd %xmm1, %xmm4
paddd %xmm9, %xmm6
paddd %xmm13, %xmm7
movdqa %xmm4, %xmm5
pslld $9, %xmm4
psrld $23, %xmm5
pxor %xmm4, %xmm2
movdqa %xmm1, %xmm4
pshufd $0x93, %xmm1, %xmm1
pxor %xmm5, %xmm2
movdqa %xmm6, %xmm5
pslld $9, %xmm6
psrld $23, %xmm5
pxor %xmm6, %xmm10
movdqa %xmm9, %xmm6
pshufd $0x93, %xmm9, %xmm9
pxor %xmm5, %xmm10
movdqa %xmm7, %xmm5
pslld $9, %xmm7
psrld $23, %xmm5
pxor %xmm7, %xmm14
movdqa %xmm13, %xmm7
pshufd $0x93, %xmm13, %xmm13
pxor %xmm5, %xmm14
paddd %xmm2, %xmm4
paddd %xmm10, %xmm6
paddd %xmm14, %xmm7
movdqa %xmm4, %xmm5
pslld $13, %xmm4
psrld $19, %xmm5
pxor %xmm4, %xmm3
movdqa %xmm2, %xmm4
pshufd $0x4e, %xmm2, %xmm2
pxor %xmm5, %xmm3
movdqa %xmm6, %xmm5
pslld $13, %xmm6
psrld $19, %xmm5
pxor %xmm6, %xmm11
movdqa %xmm10, %xmm6
pshufd $0x4e, %xmm10, %xmm10
pxor %xmm5, %xmm11
movdqa %xmm7, %xmm5
pslld $13, %xmm7
psrld $19, %xmm5
pxor %xmm7, %xmm15
movdqa %xmm14, %xmm7
pshufd $0x4e, %xmm14, %xmm14
pxor %xmm5, %xmm15
paddd %xmm3, %xmm4
paddd %xmm11, %xmm6
paddd %xmm15, %xmm7
movdqa %xmm4, %xmm5
pslld $18, %xmm4
psrld $14, %xmm5
pxor %xmm4, %xmm0
pshufd $0x39, %xmm3, %xmm3
pxor %xmm5, %xmm0
movdqa %xmm6, %xmm5
pslld $18, %xmm6
psrld $14, %xmm5
pxor %xmm6, %xmm8
pshufd $0x39, %xmm11, %xmm11
pxor %xmm5, %xmm8
movdqa %xmm7, %xmm5
pslld $18, %xmm7
psrld $14, %xmm5
pxor %xmm7, %xmm12
pshufd $0x39, %xmm15, %xmm15
pxor %xmm5, %xmm12
.endm
.macro salsa8_core_3way_xmm
salsa8_core_3way_xmm_doubleround
salsa8_core_3way_xmm_doubleround
salsa8_core_3way_xmm_doubleround
salsa8_core_3way_xmm_doubleround
.endm
.p2align 6
scrypt_core_3way_xmm:
scrypt_shuffle %rdi, 0, %rsp, 0
scrypt_shuffle %rdi, 64, %rsp, 64
scrypt_shuffle %rdi, 128, %rsp, 128
scrypt_shuffle %rdi, 192, %rsp, 192
scrypt_shuffle %rdi, 256, %rsp, 256
scrypt_shuffle %rdi, 320, %rsp, 320
movdqa 64(%rsp), %xmm0
movdqa 80(%rsp), %xmm1
movdqa 96(%rsp), %xmm2
movdqa 112(%rsp), %xmm3
movdqa 128+64(%rsp), %xmm8
movdqa 128+80(%rsp), %xmm9
movdqa 128+96(%rsp), %xmm10
movdqa 128+112(%rsp), %xmm11
movdqa 256+64(%rsp), %xmm12
movdqa 256+80(%rsp), %xmm13
movdqa 256+96(%rsp), %xmm14
movdqa 256+112(%rsp), %xmm15
movq %rsi, %rbx
leaq (%r8, %r8, 2), %rax
shlq $7, %rax
addq %rsi, %rax
scrypt_core_3way_xmm_loop1:
movdqa %xmm0, 64(%rbx)
movdqa %xmm1, 80(%rbx)
movdqa %xmm2, 96(%rbx)
movdqa %xmm3, 112(%rbx)
pxor 0(%rsp), %xmm0
pxor 16(%rsp), %xmm1
pxor 32(%rsp), %xmm2
pxor 48(%rsp), %xmm3
movdqa %xmm8, 128+64(%rbx)
movdqa %xmm9, 128+80(%rbx)
movdqa %xmm10, 128+96(%rbx)
movdqa %xmm11, 128+112(%rbx)
pxor 128+0(%rsp), %xmm8
pxor 128+16(%rsp), %xmm9
pxor 128+32(%rsp), %xmm10
pxor 128+48(%rsp), %xmm11
movdqa %xmm12, 256+64(%rbx)
movdqa %xmm13, 256+80(%rbx)
movdqa %xmm14, 256+96(%rbx)
movdqa %xmm15, 256+112(%rbx)
pxor 256+0(%rsp), %xmm12
pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15
movdqa %xmm0, 0(%rbx)
movdqa %xmm1, 16(%rbx)
movdqa %xmm2, 32(%rbx)
movdqa %xmm3, 48(%rbx)
movdqa %xmm8, 128+0(%rbx)
movdqa %xmm9, 128+16(%rbx)
movdqa %xmm10, 128+32(%rbx)
movdqa %xmm11, 128+48(%rbx)
movdqa %xmm12, 256+0(%rbx)
movdqa %xmm13, 256+16(%rbx)
movdqa %xmm14, 256+32(%rbx)
movdqa %xmm15, 256+48(%rbx)
salsa8_core_3way_xmm
paddd 0(%rbx), %xmm0
paddd 16(%rbx), %xmm1
paddd 32(%rbx), %xmm2
paddd 48(%rbx), %xmm3
paddd 128+0(%rbx), %xmm8
paddd 128+16(%rbx), %xmm9
paddd 128+32(%rbx), %xmm10
paddd 128+48(%rbx), %xmm11
paddd 256+0(%rbx), %xmm12
paddd 256+16(%rbx), %xmm13
paddd 256+32(%rbx), %xmm14
paddd 256+48(%rbx), %xmm15
movdqa %xmm0, 0(%rsp)
movdqa %xmm1, 16(%rsp)
movdqa %xmm2, 32(%rsp)
movdqa %xmm3, 48(%rsp)
movdqa %xmm8, 128+0(%rsp)
movdqa %xmm9, 128+16(%rsp)
movdqa %xmm10, 128+32(%rsp)
movdqa %xmm11, 128+48(%rsp)
movdqa %xmm12, 256+0(%rsp)
movdqa %xmm13, 256+16(%rsp)
movdqa %xmm14, 256+32(%rsp)
movdqa %xmm15, 256+48(%rsp)
pxor 64(%rbx), %xmm0
pxor 80(%rbx), %xmm1
pxor 96(%rbx), %xmm2
pxor 112(%rbx), %xmm3
pxor 128+64(%rbx), %xmm8
pxor 128+80(%rbx), %xmm9
pxor 128+96(%rbx), %xmm10
pxor 128+112(%rbx), %xmm11
pxor 256+64(%rbx), %xmm12
pxor 256+80(%rbx), %xmm13
pxor 256+96(%rbx), %xmm14
pxor 256+112(%rbx), %xmm15
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
salsa8_core_3way_xmm
paddd 64(%rsp), %xmm0
paddd 80(%rsp), %xmm1
paddd 96(%rsp), %xmm2
paddd 112(%rsp), %xmm3
paddd 128+64(%rsp), %xmm8
paddd 128+80(%rsp), %xmm9
paddd 128+96(%rsp), %xmm10
paddd 128+112(%rsp), %xmm11
paddd 256+64(%rsp), %xmm12
paddd 256+80(%rsp), %xmm13
paddd 256+96(%rsp), %xmm14
paddd 256+112(%rsp), %xmm15
addq $3*128, %rbx
cmpq %rax, %rbx
jne scrypt_core_3way_xmm_loop1
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
movq %r8, %rcx
subq $1, %r8
scrypt_core_3way_xmm_loop2:
movd %xmm0, %ebp
movd %xmm8, %ebx
movd %xmm12, %eax
pxor 0(%rsp), %xmm0
pxor 16(%rsp), %xmm1
pxor 32(%rsp), %xmm2
pxor 48(%rsp), %xmm3
pxor 128+0(%rsp), %xmm8
pxor 128+16(%rsp), %xmm9
pxor 128+32(%rsp), %xmm10
pxor 128+48(%rsp), %xmm11
pxor 256+0(%rsp), %xmm12
pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15
andl %r8d, %ebp
leaq (%rbp, %rbp, 2), %rbp
shll $7, %ebp
andl %r8d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx
shll $7, %ebx
andl %r8d, %eax
leaq 2(%rax, %rax, 2), %rax
shll $7, %eax
pxor 0(%rsi, %rbp), %xmm0
pxor 16(%rsi, %rbp), %xmm1
pxor 32(%rsi, %rbp), %xmm2
pxor 48(%rsi, %rbp), %xmm3
pxor 0(%rsi, %rbx), %xmm8
pxor 16(%rsi, %rbx), %xmm9
pxor 32(%rsi, %rbx), %xmm10
pxor 48(%rsi, %rbx), %xmm11
pxor 0(%rsi, %rax), %xmm12
pxor 16(%rsi, %rax), %xmm13
pxor 32(%rsi, %rax), %xmm14
pxor 48(%rsi, %rax), %xmm15
movdqa %xmm0, 0(%rsp)
movdqa %xmm1, 16(%rsp)
movdqa %xmm2, 32(%rsp)
movdqa %xmm3, 48(%rsp)
movdqa %xmm8, 128+0(%rsp)
movdqa %xmm9, 128+16(%rsp)
movdqa %xmm10, 128+32(%rsp)
movdqa %xmm11, 128+48(%rsp)
movdqa %xmm12, 256+0(%rsp)
movdqa %xmm13, 256+16(%rsp)
movdqa %xmm14, 256+32(%rsp)
movdqa %xmm15, 256+48(%rsp)
salsa8_core_3way_xmm
paddd 0(%rsp), %xmm0
paddd 16(%rsp), %xmm1
paddd 32(%rsp), %xmm2
paddd 48(%rsp), %xmm3
paddd 128+0(%rsp), %xmm8
paddd 128+16(%rsp), %xmm9
paddd 128+32(%rsp), %xmm10
paddd 128+48(%rsp), %xmm11
paddd 256+0(%rsp), %xmm12
paddd 256+16(%rsp), %xmm13
paddd 256+32(%rsp), %xmm14
paddd 256+48(%rsp), %xmm15
movdqa %xmm0, 0(%rsp)
movdqa %xmm1, 16(%rsp)
movdqa %xmm2, 32(%rsp)
movdqa %xmm3, 48(%rsp)
movdqa %xmm8, 128+0(%rsp)
movdqa %xmm9, 128+16(%rsp)
movdqa %xmm10, 128+32(%rsp)
movdqa %xmm11, 128+48(%rsp)
movdqa %xmm12, 256+0(%rsp)
movdqa %xmm13, 256+16(%rsp)
movdqa %xmm14, 256+32(%rsp)
movdqa %xmm15, 256+48(%rsp)
pxor 64(%rsi, %rbp), %xmm0
pxor 80(%rsi, %rbp), %xmm1
pxor 96(%rsi, %rbp), %xmm2
pxor 112(%rsi, %rbp), %xmm3
pxor 64(%rsi, %rbx), %xmm8
pxor 80(%rsi, %rbx), %xmm9
pxor 96(%rsi, %rbx), %xmm10
pxor 112(%rsi, %rbx), %xmm11
pxor 64(%rsi, %rax), %xmm12
pxor 80(%rsi, %rax), %xmm13
pxor 96(%rsi, %rax), %xmm14
pxor 112(%rsi, %rax), %xmm15
pxor 64(%rsp), %xmm0
pxor 80(%rsp), %xmm1
pxor 96(%rsp), %xmm2
pxor 112(%rsp), %xmm3
pxor 128+64(%rsp), %xmm8
pxor 128+80(%rsp), %xmm9
pxor 128+96(%rsp), %xmm10
pxor 128+112(%rsp), %xmm11
pxor 256+64(%rsp), %xmm12
pxor 256+80(%rsp), %xmm13
pxor 256+96(%rsp), %xmm14
pxor 256+112(%rsp), %xmm15
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
salsa8_core_3way_xmm
paddd 64(%rsp), %xmm0
paddd 80(%rsp), %xmm1
paddd 96(%rsp), %xmm2
paddd 112(%rsp), %xmm3
paddd 128+64(%rsp), %xmm8
paddd 128+80(%rsp), %xmm9
paddd 128+96(%rsp), %xmm10
paddd 128+112(%rsp), %xmm11
paddd 256+64(%rsp), %xmm12
paddd 256+80(%rsp), %xmm13
paddd 256+96(%rsp), %xmm14
paddd 256+112(%rsp), %xmm15
movdqa %xmm0, 64(%rsp)
movdqa %xmm1, 80(%rsp)
movdqa %xmm2, 96(%rsp)
movdqa %xmm3, 112(%rsp)
movdqa %xmm8, 128+64(%rsp)
movdqa %xmm9, 128+80(%rsp)
movdqa %xmm10, 128+96(%rsp)
movdqa %xmm11, 128+112(%rsp)
movdqa %xmm12, 256+64(%rsp)
movdqa %xmm13, 256+80(%rsp)
movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp)
subq $1, %rcx
ja scrypt_core_3way_xmm_loop2
scrypt_shuffle %rsp, 0, %rdi, 0
scrypt_shuffle %rsp, 64, %rdi, 64
scrypt_shuffle %rsp, 128, %rdi, 128
scrypt_shuffle %rsp, 192, %rdi, 192
scrypt_shuffle %rsp, 256, %rdi, 256
scrypt_shuffle %rsp, 320, %rdi, 320
scrypt_core_3way_cleanup
ret
#if defined(USE_AVX2)
.macro salsa8_core_6way_avx2_doubleround
vpaddd %ymm0, %ymm1, %ymm4
vpaddd %ymm8, %ymm9, %ymm6
vpaddd %ymm12, %ymm13, %ymm7
vpslld $7, %ymm4, %ymm5
vpsrld $25, %ymm4, %ymm4
vpxor %ymm5, %ymm3, %ymm3
vpxor %ymm4, %ymm3, %ymm3
vpslld $7, %ymm6, %ymm5
vpsrld $25, %ymm6, %ymm6
vpxor %ymm5, %ymm11, %ymm11
vpxor %ymm6, %ymm11, %ymm11
vpslld $7, %ymm7, %ymm5
vpsrld $25, %ymm7, %ymm7
vpxor %ymm5, %ymm15, %ymm15
vpxor %ymm7, %ymm15, %ymm15
vpaddd %ymm3, %ymm0, %ymm4
vpaddd %ymm11, %ymm8, %ymm6
vpaddd %ymm15, %ymm12, %ymm7
vpslld $9, %ymm4, %ymm5
vpsrld $23, %ymm4, %ymm4
vpxor %ymm5, %ymm2, %ymm2
vpxor %ymm4, %ymm2, %ymm2
vpslld $9, %ymm6, %ymm5
vpsrld $23, %ymm6, %ymm6
vpxor %ymm5, %ymm10, %ymm10
vpxor %ymm6, %ymm10, %ymm10
vpslld $9, %ymm7, %ymm5
vpsrld $23, %ymm7, %ymm7
vpxor %ymm5, %ymm14, %ymm14
vpxor %ymm7, %ymm14, %ymm14
vpaddd %ymm2, %ymm3, %ymm4
vpaddd %ymm10, %ymm11, %ymm6
vpaddd %ymm14, %ymm15, %ymm7
vpslld $13, %ymm4, %ymm5
vpsrld $19, %ymm4, %ymm4
vpshufd $0x93, %ymm3, %ymm3
vpshufd $0x93, %ymm11, %ymm11
vpshufd $0x93, %ymm15, %ymm15
vpxor %ymm5, %ymm1, %ymm1
vpxor %ymm4, %ymm1, %ymm1
vpslld $13, %ymm6, %ymm5
vpsrld $19, %ymm6, %ymm6
vpxor %ymm5, %ymm9, %ymm9
vpxor %ymm6, %ymm9, %ymm9
vpslld $13, %ymm7, %ymm5
vpsrld $19, %ymm7, %ymm7
vpxor %ymm5, %ymm13, %ymm13
vpxor %ymm7, %ymm13, %ymm13
vpaddd %ymm1, %ymm2, %ymm4
vpaddd %ymm9, %ymm10, %ymm6
vpaddd %ymm13, %ymm14, %ymm7
vpslld $18, %ymm4, %ymm5
vpsrld $14, %ymm4, %ymm4
vpshufd $0x4e, %ymm2, %ymm2
vpshufd $0x4e, %ymm10, %ymm10
vpshufd $0x4e, %ymm14, %ymm14
vpxor %ymm5, %ymm0, %ymm0
vpxor %ymm4, %ymm0, %ymm0
vpslld $18, %ymm6, %ymm5
vpsrld $14, %ymm6, %ymm6
vpxor %ymm5, %ymm8, %ymm8
vpxor %ymm6, %ymm8, %ymm8
vpslld $18, %ymm7, %ymm5
vpsrld $14, %ymm7, %ymm7
vpxor %ymm5, %ymm12, %ymm12
vpxor %ymm7, %ymm12, %ymm12
vpaddd %ymm0, %ymm3, %ymm4
vpaddd %ymm8, %ymm11, %ymm6
vpaddd %ymm12, %ymm15, %ymm7
vpslld $7, %ymm4, %ymm5
vpsrld $25, %ymm4, %ymm4
vpshufd $0x39, %ymm1, %ymm1
vpxor %ymm5, %ymm1, %ymm1
vpxor %ymm4, %ymm1, %ymm1
vpslld $7, %ymm6, %ymm5
vpsrld $25, %ymm6, %ymm6
vpshufd $0x39, %ymm9, %ymm9
vpxor %ymm5, %ymm9, %ymm9
vpxor %ymm6, %ymm9, %ymm9
vpslld $7, %ymm7, %ymm5
vpsrld $25, %ymm7, %ymm7
vpshufd $0x39, %ymm13, %ymm13
vpxor %ymm5, %ymm13, %ymm13
vpxor %ymm7, %ymm13, %ymm13
vpaddd %ymm1, %ymm0, %ymm4
vpaddd %ymm9, %ymm8, %ymm6
vpaddd %ymm13, %ymm12, %ymm7
vpslld $9, %ymm4, %ymm5
vpsrld $23, %ymm4, %ymm4
vpxor %ymm5, %ymm2, %ymm2
vpxor %ymm4, %ymm2, %ymm2
vpslld $9, %ymm6, %ymm5
vpsrld $23, %ymm6, %ymm6
vpxor %ymm5, %ymm10, %ymm10
vpxor %ymm6, %ymm10, %ymm10
vpslld $9, %ymm7, %ymm5
vpsrld $23, %ymm7, %ymm7
vpxor %ymm5, %ymm14, %ymm14
vpxor %ymm7, %ymm14, %ymm14
vpaddd %ymm2, %ymm1, %ymm4
vpaddd %ymm10, %ymm9, %ymm6
vpaddd %ymm14, %ymm13, %ymm7
vpslld $13, %ymm4, %ymm5
vpsrld $19, %ymm4, %ymm4
vpshufd $0x93, %ymm1, %ymm1
vpshufd $0x93, %ymm9, %ymm9
vpshufd $0x93, %ymm13, %ymm13
vpxor %ymm5, %ymm3, %ymm3
vpxor %ymm4, %ymm3, %ymm3
vpslld $13, %ymm6, %ymm5
vpsrld $19, %ymm6, %ymm6
vpxor %ymm5, %ymm11, %ymm11
vpxor %ymm6, %ymm11, %ymm11
vpslld $13, %ymm7, %ymm5
vpsrld $19, %ymm7, %ymm7
vpxor %ymm5, %ymm15, %ymm15
vpxor %ymm7, %ymm15, %ymm15
vpaddd %ymm3, %ymm2, %ymm4
vpaddd %ymm11, %ymm10, %ymm6
vpaddd %ymm15, %ymm14, %ymm7
vpslld $18, %ymm4, %ymm5
vpsrld $14, %ymm4, %ymm4
vpshufd $0x4e, %ymm2, %ymm2
vpshufd $0x4e, %ymm10, %ymm10
vpxor %ymm5, %ymm0, %ymm0
vpxor %ymm4, %ymm0, %ymm0
vpslld $18, %ymm6, %ymm5
vpsrld $14, %ymm6, %ymm6
vpshufd $0x4e, %ymm14, %ymm14
vpshufd $0x39, %ymm11, %ymm11
vpxor %ymm5, %ymm8, %ymm8
vpxor %ymm6, %ymm8, %ymm8
vpslld $18, %ymm7, %ymm5
vpsrld $14, %ymm7, %ymm7
vpshufd $0x39, %ymm3, %ymm3
vpshufd $0x39, %ymm15, %ymm15
vpxor %ymm5, %ymm12, %ymm12
vpxor %ymm7, %ymm12, %ymm12
.endm
.macro salsa8_core_6way_avx2
salsa8_core_6way_avx2_doubleround
salsa8_core_6way_avx2_doubleround
salsa8_core_6way_avx2_doubleround
salsa8_core_6way_avx2_doubleround
.endm
.text
.p2align 6
.globl scrypt_core_6way
.globl _scrypt_core_6way
scrypt_core_6way:
_scrypt_core_6way:
pushq %rbx
pushq %rbp
#if defined(_WIN64) || defined(__CYGWIN__)
subq $176, %rsp
vmovdqa %xmm6, 8(%rsp)
vmovdqa %xmm7, 24(%rsp)
vmovdqa %xmm8, 40(%rsp)
vmovdqa %xmm9, 56(%rsp)
vmovdqa %xmm10, 72(%rsp)
vmovdqa %xmm11, 88(%rsp)
vmovdqa %xmm12, 104(%rsp)
vmovdqa %xmm13, 120(%rsp)
vmovdqa %xmm14, 136(%rsp)
vmovdqa %xmm15, 152(%rsp)
pushq %rdi
pushq %rsi
movq %rcx, %rdi
movq %rdx, %rsi
#else
movq %rdx, %r8
#endif
movq %rsp, %rdx
subq $768, %rsp
andq $-128, %rsp
.macro scrypt_core_6way_cleanup
movq %rdx, %rsp
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rsi
popq %rdi
vmovdqa 8(%rsp), %xmm6
vmovdqa 24(%rsp), %xmm7
vmovdqa 40(%rsp), %xmm8
vmovdqa 56(%rsp), %xmm9
vmovdqa 72(%rsp), %xmm10
vmovdqa 88(%rsp), %xmm11
vmovdqa 104(%rsp), %xmm12
vmovdqa 120(%rsp), %xmm13
vmovdqa 136(%rsp), %xmm14
vmovdqa 152(%rsp), %xmm15
addq $176, %rsp
#endif
popq %rbp
popq %rbx
.endm
.macro scrypt_shuffle_pack2 src, so, dest, do
vmovdqa \so+0*16(\src), %xmm0
vmovdqa \so+1*16(\src), %xmm1
vmovdqa \so+2*16(\src), %xmm2
vmovdqa \so+3*16(\src), %xmm3
vinserti128 $1, \so+128+0*16(\src), %ymm0, %ymm0
vinserti128 $1, \so+128+1*16(\src), %ymm1, %ymm1
vinserti128 $1, \so+128+2*16(\src), %ymm2, %ymm2
vinserti128 $1, \so+128+3*16(\src), %ymm3, %ymm3
vpblendd $0x33, %ymm0, %ymm2, %ymm4
vpblendd $0xcc, %ymm1, %ymm3, %ymm5
vpblendd $0x33, %ymm2, %ymm0, %ymm6
vpblendd $0xcc, %ymm3, %ymm1, %ymm7
vpblendd $0x55, %ymm7, %ymm6, %ymm3
vpblendd $0x55, %ymm6, %ymm5, %ymm2
vpblendd $0x55, %ymm5, %ymm4, %ymm1
vpblendd $0x55, %ymm4, %ymm7, %ymm0
vmovdqa %ymm0, \do+0*32(\dest)
vmovdqa %ymm1, \do+1*32(\dest)
vmovdqa %ymm2, \do+2*32(\dest)
vmovdqa %ymm3, \do+3*32(\dest)
.endm
.macro scrypt_shuffle_unpack2 src, so, dest, do
vmovdqa \so+0*32(\src), %ymm0
vmovdqa \so+1*32(\src), %ymm1
vmovdqa \so+2*32(\src), %ymm2
vmovdqa \so+3*32(\src), %ymm3
vpblendd $0x33, %ymm0, %ymm2, %ymm4
vpblendd $0xcc, %ymm1, %ymm3, %ymm5
vpblendd $0x33, %ymm2, %ymm0, %ymm6
vpblendd $0xcc, %ymm3, %ymm1, %ymm7
vpblendd $0x55, %ymm7, %ymm6, %ymm3
vpblendd $0x55, %ymm6, %ymm5, %ymm2
vpblendd $0x55, %ymm5, %ymm4, %ymm1
vpblendd $0x55, %ymm4, %ymm7, %ymm0
vmovdqa %xmm0, \do+0*16(\dest)
vmovdqa %xmm1, \do+1*16(\dest)
vmovdqa %xmm2, \do+2*16(\dest)
vmovdqa %xmm3, \do+3*16(\dest)
vextracti128 $1, %ymm0, \do+128+0*16(\dest)
vextracti128 $1, %ymm1, \do+128+1*16(\dest)
vextracti128 $1, %ymm2, \do+128+2*16(\dest)
vextracti128 $1, %ymm3, \do+128+3*16(\dest)
.endm
scrypt_core_6way_avx2:
scrypt_shuffle_pack2 %rdi, 0*256+0, %rsp, 0*128
scrypt_shuffle_pack2 %rdi, 0*256+64, %rsp, 1*128
scrypt_shuffle_pack2 %rdi, 1*256+0, %rsp, 2*128
scrypt_shuffle_pack2 %rdi, 1*256+64, %rsp, 3*128
scrypt_shuffle_pack2 %rdi, 2*256+0, %rsp, 4*128
scrypt_shuffle_pack2 %rdi, 2*256+64, %rsp, 5*128
vmovdqa 0*256+4*32(%rsp), %ymm0
vmovdqa 0*256+5*32(%rsp), %ymm1
vmovdqa 0*256+6*32(%rsp), %ymm2
vmovdqa 0*256+7*32(%rsp), %ymm3
vmovdqa 1*256+4*32(%rsp), %ymm8
vmovdqa 1*256+5*32(%rsp), %ymm9
vmovdqa 1*256+6*32(%rsp), %ymm10
vmovdqa 1*256+7*32(%rsp), %ymm11
vmovdqa 2*256+4*32(%rsp), %ymm12
vmovdqa 2*256+5*32(%rsp), %ymm13
vmovdqa 2*256+6*32(%rsp), %ymm14
vmovdqa 2*256+7*32(%rsp), %ymm15
movq %rsi, %rbx
leaq (%r8, %r8, 2), %rax
shlq $8, %rax
addq %rsi, %rax
scrypt_core_6way_avx2_loop1:
vmovdqa %ymm0, 0*256+4*32(%rbx)
vmovdqa %ymm1, 0*256+5*32(%rbx)
vmovdqa %ymm2, 0*256+6*32(%rbx)
vmovdqa %ymm3, 0*256+7*32(%rbx)
vpxor 0*256+0*32(%rsp), %ymm0, %ymm0
vpxor 0*256+1*32(%rsp), %ymm1, %ymm1
vpxor 0*256+2*32(%rsp), %ymm2, %ymm2
vpxor 0*256+3*32(%rsp), %ymm3, %ymm3
vmovdqa %ymm8, 1*256+4*32(%rbx)
vmovdqa %ymm9, 1*256+5*32(%rbx)
vmovdqa %ymm10, 1*256+6*32(%rbx)
vmovdqa %ymm11, 1*256+7*32(%rbx)
vpxor 1*256+0*32(%rsp), %ymm8, %ymm8
vpxor 1*256+1*32(%rsp), %ymm9, %ymm9
vpxor 1*256+2*32(%rsp), %ymm10, %ymm10
vpxor 1*256+3*32(%rsp), %ymm11, %ymm11
vmovdqa %ymm12, 2*256+4*32(%rbx)
vmovdqa %ymm13, 2*256+5*32(%rbx)
vmovdqa %ymm14, 2*256+6*32(%rbx)
vmovdqa %ymm15, 2*256+7*32(%rbx)
vpxor 2*256+0*32(%rsp), %ymm12, %ymm12
vpxor 2*256+1*32(%rsp), %ymm13, %ymm13
vpxor 2*256+2*32(%rsp), %ymm14, %ymm14
vpxor 2*256+3*32(%rsp), %ymm15, %ymm15
vmovdqa %ymm0, 0*256+0*32(%rbx)
vmovdqa %ymm1, 0*256+1*32(%rbx)
vmovdqa %ymm2, 0*256+2*32(%rbx)
vmovdqa %ymm3, 0*256+3*32(%rbx)
vmovdqa %ymm8, 1*256+0*32(%rbx)
vmovdqa %ymm9, 1*256+1*32(%rbx)
vmovdqa %ymm10, 1*256+2*32(%rbx)
vmovdqa %ymm11, 1*256+3*32(%rbx)
vmovdqa %ymm12, 2*256+0*32(%rbx)
vmovdqa %ymm13, 2*256+1*32(%rbx)
vmovdqa %ymm14, 2*256+2*32(%rbx)
vmovdqa %ymm15, 2*256+3*32(%rbx)
salsa8_core_6way_avx2
vpaddd 0*256+0*32(%rbx), %ymm0, %ymm0
vpaddd 0*256+1*32(%rbx), %ymm1, %ymm1
vpaddd 0*256+2*32(%rbx), %ymm2, %ymm2
vpaddd 0*256+3*32(%rbx), %ymm3, %ymm3
vpaddd 1*256+0*32(%rbx), %ymm8, %ymm8
vpaddd 1*256+1*32(%rbx), %ymm9, %ymm9
vpaddd 1*256+2*32(%rbx), %ymm10, %ymm10
vpaddd 1*256+3*32(%rbx), %ymm11, %ymm11
vpaddd 2*256+0*32(%rbx), %ymm12, %ymm12
vpaddd 2*256+1*32(%rbx), %ymm13, %ymm13
vpaddd 2*256+2*32(%rbx), %ymm14, %ymm14
vpaddd 2*256+3*32(%rbx), %ymm15, %ymm15
vmovdqa %ymm0, 0*256+0*32(%rsp)
vmovdqa %ymm1, 0*256+1*32(%rsp)
vmovdqa %ymm2, 0*256+2*32(%rsp)
vmovdqa %ymm3, 0*256+3*32(%rsp)
vmovdqa %ymm8, 1*256+0*32(%rsp)
vmovdqa %ymm9, 1*256+1*32(%rsp)
vmovdqa %ymm10, 1*256+2*32(%rsp)
vmovdqa %ymm11, 1*256+3*32(%rsp)
vmovdqa %ymm12, 2*256+0*32(%rsp)
vmovdqa %ymm13, 2*256+1*32(%rsp)
vmovdqa %ymm14, 2*256+2*32(%rsp)
vmovdqa %ymm15, 2*256+3*32(%rsp)
vpxor 0*256+4*32(%rbx), %ymm0, %ymm0
vpxor 0*256+5*32(%rbx), %ymm1, %ymm1
vpxor 0*256+6*32(%rbx), %ymm2, %ymm2
vpxor 0*256+7*32(%rbx), %ymm3, %ymm3
vpxor 1*256+4*32(%rbx), %ymm8, %ymm8
vpxor 1*256+5*32(%rbx), %ymm9, %ymm9
vpxor 1*256+6*32(%rbx), %ymm10, %ymm10
vpxor 1*256+7*32(%rbx), %ymm11, %ymm11
vpxor 2*256+4*32(%rbx), %ymm12, %ymm12
vpxor 2*256+5*32(%rbx), %ymm13, %ymm13
vpxor 2*256+6*32(%rbx), %ymm14, %ymm14
vpxor 2*256+7*32(%rbx), %ymm15, %ymm15
vmovdqa %ymm0, 0*256+4*32(%rsp)
vmovdqa %ymm1, 0*256+5*32(%rsp)
vmovdqa %ymm2, 0*256+6*32(%rsp)
vmovdqa %ymm3, 0*256+7*32(%rsp)
vmovdqa %ymm8, 1*256+4*32(%rsp)
vmovdqa %ymm9, 1*256+5*32(%rsp)
vmovdqa %ymm10, 1*256+6*32(%rsp)
vmovdqa %ymm11, 1*256+7*32(%rsp)
vmovdqa %ymm12, 2*256+4*32(%rsp)
vmovdqa %ymm13, 2*256+5*32(%rsp)
vmovdqa %ymm14, 2*256+6*32(%rsp)
vmovdqa %ymm15, 2*256+7*32(%rsp)
salsa8_core_6way_avx2
vpaddd 0*256+4*32(%rsp), %ymm0, %ymm0
vpaddd 0*256+5*32(%rsp), %ymm1, %ymm1
vpaddd 0*256+6*32(%rsp), %ymm2, %ymm2
vpaddd 0*256+7*32(%rsp), %ymm3, %ymm3
vpaddd 1*256+4*32(%rsp), %ymm8, %ymm8
vpaddd 1*256+5*32(%rsp), %ymm9, %ymm9
vpaddd 1*256+6*32(%rsp), %ymm10, %ymm10
vpaddd 1*256+7*32(%rsp), %ymm11, %ymm11
vpaddd 2*256+4*32(%rsp), %ymm12, %ymm12
vpaddd 2*256+5*32(%rsp), %ymm13, %ymm13
vpaddd 2*256+6*32(%rsp), %ymm14, %ymm14
vpaddd 2*256+7*32(%rsp), %ymm15, %ymm15
addq $6*128, %rbx
cmpq %rax, %rbx
jne scrypt_core_6way_avx2_loop1
vmovdqa %ymm0, 0*256+4*32(%rsp)
vmovdqa %ymm1, 0*256+5*32(%rsp)
vmovdqa %ymm2, 0*256+6*32(%rsp)
vmovdqa %ymm3, 0*256+7*32(%rsp)
vmovdqa %ymm8, 1*256+4*32(%rsp)
vmovdqa %ymm9, 1*256+5*32(%rsp)
vmovdqa %ymm10, 1*256+6*32(%rsp)
vmovdqa %ymm11, 1*256+7*32(%rsp)
vmovdqa %ymm12, 2*256+4*32(%rsp)
vmovdqa %ymm13, 2*256+5*32(%rsp)
vmovdqa %ymm14, 2*256+6*32(%rsp)
vmovdqa %ymm15, 2*256+7*32(%rsp)
movq %r8, %rcx
leaq -1(%r8), %r11
scrypt_core_6way_avx2_loop2:
vmovd %xmm0, %ebp
vmovd %xmm8, %ebx
vmovd %xmm12, %eax
vextracti128 $1, %ymm0, %xmm4
vextracti128 $1, %ymm8, %xmm5
vextracti128 $1, %ymm12, %xmm6
vmovd %xmm4, %r8d
vmovd %xmm5, %r9d
vmovd %xmm6, %r10d
vpxor 0*256+0*32(%rsp), %ymm0, %ymm0
vpxor 0*256+1*32(%rsp), %ymm1, %ymm1
vpxor 0*256+2*32(%rsp), %ymm2, %ymm2
vpxor 0*256+3*32(%rsp), %ymm3, %ymm3
vpxor 1*256+0*32(%rsp), %ymm8, %ymm8
vpxor 1*256+1*32(%rsp), %ymm9, %ymm9
vpxor 1*256+2*32(%rsp), %ymm10, %ymm10
vpxor 1*256+3*32(%rsp), %ymm11, %ymm11
vpxor 2*256+0*32(%rsp), %ymm12, %ymm12
vpxor 2*256+1*32(%rsp), %ymm13, %ymm13
vpxor 2*256+2*32(%rsp), %ymm14, %ymm14
vpxor 2*256+3*32(%rsp), %ymm15, %ymm15
andl %r11d, %ebp
leaq 0(%rbp, %rbp, 2), %rbp
shll $8, %ebp
andl %r11d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx
shll $8, %ebx
andl %r11d, %eax
leaq 2(%rax, %rax, 2), %rax
shll $8, %eax
andl %r11d, %r8d
leaq 0(%r8, %r8, 2), %r8
shll $8, %r8d
andl %r11d, %r9d
leaq 1(%r9, %r9, 2), %r9
shll $8, %r9d
andl %r11d, %r10d
leaq 2(%r10, %r10, 2), %r10
shll $8, %r10d
vmovdqa 0*32(%rsi, %rbp), %xmm4
vinserti128 $1, 0*32+16(%rsi, %r8), %ymm4, %ymm4
vmovdqa 1*32(%rsi, %rbp), %xmm5
vinserti128 $1, 1*32+16(%rsi, %r8), %ymm5, %ymm5
vmovdqa 2*32(%rsi, %rbp), %xmm6
vinserti128 $1, 2*32+16(%rsi, %r8), %ymm6, %ymm6
vmovdqa 3*32(%rsi, %rbp), %xmm7
vinserti128 $1, 3*32+16(%rsi, %r8), %ymm7, %ymm7
vpxor %ymm4, %ymm0, %ymm0
vpxor %ymm5, %ymm1, %ymm1
vpxor %ymm6, %ymm2, %ymm2
vpxor %ymm7, %ymm3, %ymm3
vmovdqa 0*32(%rsi, %rbx), %xmm4
vinserti128 $1, 0*32+16(%rsi, %r9), %ymm4, %ymm4
vmovdqa 1*32(%rsi, %rbx), %xmm5
vinserti128 $1, 1*32+16(%rsi, %r9), %ymm5, %ymm5
vmovdqa 2*32(%rsi, %rbx), %xmm6
vinserti128 $1, 2*32+16(%rsi, %r9), %ymm6, %ymm6
vmovdqa 3*32(%rsi, %rbx), %xmm7
vinserti128 $1, 3*32+16(%rsi, %r9), %ymm7, %ymm7
vpxor %ymm4, %ymm8, %ymm8
vpxor %ymm5, %ymm9, %ymm9
vpxor %ymm6, %ymm10, %ymm10
vpxor %ymm7, %ymm11, %ymm11
vmovdqa 0*32(%rsi, %rax), %xmm4
vinserti128 $1, 0*32+16(%rsi, %r10), %ymm4, %ymm4
vmovdqa 1*32(%rsi, %rax), %xmm5
vinserti128 $1, 1*32+16(%rsi, %r10), %ymm5, %ymm5
vmovdqa 2*32(%rsi, %rax), %xmm6
vinserti128 $1, 2*32+16(%rsi, %r10), %ymm6, %ymm6
vmovdqa 3*32(%rsi, %rax), %xmm7
vinserti128 $1, 3*32+16(%rsi, %r10), %ymm7, %ymm7
vpxor %ymm4, %ymm12, %ymm12
vpxor %ymm5, %ymm13, %ymm13
vpxor %ymm6, %ymm14, %ymm14
vpxor %ymm7, %ymm15, %ymm15
vmovdqa %ymm0, 0*256+0*32(%rsp)
vmovdqa %ymm1, 0*256+1*32(%rsp)
vmovdqa %ymm2, 0*256+2*32(%rsp)
vmovdqa %ymm3, 0*256+3*32(%rsp)
vmovdqa %ymm8, 1*256+0*32(%rsp)
vmovdqa %ymm9, 1*256+1*32(%rsp)
vmovdqa %ymm10, 1*256+2*32(%rsp)
vmovdqa %ymm11, 1*256+3*32(%rsp)
vmovdqa %ymm12, 2*256+0*32(%rsp)
vmovdqa %ymm13, 2*256+1*32(%rsp)
vmovdqa %ymm14, 2*256+2*32(%rsp)
vmovdqa %ymm15, 2*256+3*32(%rsp)
salsa8_core_6way_avx2
vpaddd 0*256+0*32(%rsp), %ymm0, %ymm0
vpaddd 0*256+1*32(%rsp), %ymm1, %ymm1
vpaddd 0*256+2*32(%rsp), %ymm2, %ymm2
vpaddd 0*256+3*32(%rsp), %ymm3, %ymm3
vpaddd 1*256+0*32(%rsp), %ymm8, %ymm8
vpaddd 1*256+1*32(%rsp), %ymm9, %ymm9
vpaddd 1*256+2*32(%rsp), %ymm10, %ymm10
vpaddd 1*256+3*32(%rsp), %ymm11, %ymm11
vpaddd 2*256+0*32(%rsp), %ymm12, %ymm12
vpaddd 2*256+1*32(%rsp), %ymm13, %ymm13
vpaddd 2*256+2*32(%rsp), %ymm14, %ymm14
vpaddd 2*256+3*32(%rsp), %ymm15, %ymm15
vmovdqa %ymm0, 0*256+0*32(%rsp)
vmovdqa %ymm1, 0*256+1*32(%rsp)
vmovdqa %ymm2, 0*256+2*32(%rsp)
vmovdqa %ymm3, 0*256+3*32(%rsp)
vmovdqa %ymm8, 1*256+0*32(%rsp)
vmovdqa %ymm9, 1*256+1*32(%rsp)
vmovdqa %ymm10, 1*256+2*32(%rsp)
vmovdqa %ymm11, 1*256+3*32(%rsp)
vmovdqa %ymm12, 2*256+0*32(%rsp)
vmovdqa %ymm13, 2*256+1*32(%rsp)
vmovdqa %ymm14, 2*256+2*32(%rsp)
vmovdqa %ymm15, 2*256+3*32(%rsp)
vmovdqa 4*32(%rsi, %rbp), %xmm4
vinserti128 $1, 4*32+16(%rsi, %r8), %ymm4, %ymm4
vmovdqa 5*32(%rsi, %rbp), %xmm5
vinserti128 $1, 5*32+16(%rsi, %r8), %ymm5, %ymm5
vmovdqa 6*32(%rsi, %rbp), %xmm6
vinserti128 $1, 6*32+16(%rsi, %r8), %ymm6, %ymm6
vmovdqa 7*32(%rsi, %rbp), %xmm7
vinserti128 $1, 7*32+16(%rsi, %r8), %ymm7, %ymm7
vpxor %ymm4, %ymm0, %ymm0
vpxor %ymm5, %ymm1, %ymm1
vpxor %ymm6, %ymm2, %ymm2
vpxor %ymm7, %ymm3, %ymm3
vmovdqa 4*32(%rsi, %rbx), %xmm4
vinserti128 $1, 4*32+16(%rsi, %r9), %ymm4, %ymm4
vmovdqa 5*32(%rsi, %rbx), %xmm5
vinserti128 $1, 5*32+16(%rsi, %r9), %ymm5, %ymm5
vmovdqa 6*32(%rsi, %rbx), %xmm6
vinserti128 $1, 6*32+16(%rsi, %r9), %ymm6, %ymm6
vmovdqa 7*32(%rsi, %rbx), %xmm7
vinserti128 $1, 7*32+16(%rsi, %r9), %ymm7, %ymm7
vpxor %ymm4, %ymm8, %ymm8
vpxor %ymm5, %ymm9, %ymm9
vpxor %ymm6, %ymm10, %ymm10
vpxor %ymm7, %ymm11, %ymm11
vmovdqa 4*32(%rsi, %rax), %xmm4
vinserti128 $1, 4*32+16(%rsi, %r10), %ymm4, %ymm4
vmovdqa 5*32(%rsi, %rax), %xmm5
vinserti128 $1, 5*32+16(%rsi, %r10), %ymm5, %ymm5
vmovdqa 6*32(%rsi, %rax), %xmm6
vinserti128 $1, 6*32+16(%rsi, %r10), %ymm6, %ymm6
vmovdqa 7*32(%rsi, %rax), %xmm7
vinserti128 $1, 7*32+16(%rsi, %r10), %ymm7, %ymm7
vpxor %ymm4, %ymm12, %ymm12
vpxor %ymm5, %ymm13, %ymm13
vpxor %ymm6, %ymm14, %ymm14
vpxor %ymm7, %ymm15, %ymm15
vpxor 0*256+4*32(%rsp), %ymm0, %ymm0
vpxor 0*256+5*32(%rsp), %ymm1, %ymm1
vpxor 0*256+6*32(%rsp), %ymm2, %ymm2
vpxor 0*256+7*32(%rsp), %ymm3, %ymm3
vpxor 1*256+4*32(%rsp), %ymm8, %ymm8
vpxor 1*256+5*32(%rsp), %ymm9, %ymm9
vpxor 1*256+6*32(%rsp), %ymm10, %ymm10
vpxor 1*256+7*32(%rsp), %ymm11, %ymm11
vpxor 2*256+4*32(%rsp), %ymm12, %ymm12
vpxor 2*256+5*32(%rsp), %ymm13, %ymm13
vpxor 2*256+6*32(%rsp), %ymm14, %ymm14
vpxor 2*256+7*32(%rsp), %ymm15, %ymm15
vmovdqa %ymm0, 0*256+4*32(%rsp)
vmovdqa %ymm1, 0*256+5*32(%rsp)
vmovdqa %ymm2, 0*256+6*32(%rsp)
vmovdqa %ymm3, 0*256+7*32(%rsp)
vmovdqa %ymm8, 1*256+4*32(%rsp)
vmovdqa %ymm9, 1*256+5*32(%rsp)
vmovdqa %ymm10, 1*256+6*32(%rsp)
vmovdqa %ymm11, 1*256+7*32(%rsp)
vmovdqa %ymm12, 2*256+4*32(%rsp)
vmovdqa %ymm13, 2*256+5*32(%rsp)
vmovdqa %ymm14, 2*256+6*32(%rsp)
vmovdqa %ymm15, 2*256+7*32(%rsp)
salsa8_core_6way_avx2
vpaddd 0*256+4*32(%rsp), %ymm0, %ymm0
vpaddd 0*256+5*32(%rsp), %ymm1, %ymm1
vpaddd 0*256+6*32(%rsp), %ymm2, %ymm2
vpaddd 0*256+7*32(%rsp), %ymm3, %ymm3
vpaddd 1*256+4*32(%rsp), %ymm8, %ymm8
vpaddd 1*256+5*32(%rsp), %ymm9, %ymm9
vpaddd 1*256+6*32(%rsp), %ymm10, %ymm10
vpaddd 1*256+7*32(%rsp), %ymm11, %ymm11
vpaddd 2*256+4*32(%rsp), %ymm12, %ymm12
vpaddd 2*256+5*32(%rsp), %ymm13, %ymm13
vpaddd 2*256+6*32(%rsp), %ymm14, %ymm14
vpaddd 2*256+7*32(%rsp), %ymm15, %ymm15
vmovdqa %ymm0, 0*256+4*32(%rsp)
vmovdqa %ymm1, 0*256+5*32(%rsp)
vmovdqa %ymm2, 0*256+6*32(%rsp)
vmovdqa %ymm3, 0*256+7*32(%rsp)
vmovdqa %ymm8, 1*256+4*32(%rsp)
vmovdqa %ymm9, 1*256+5*32(%rsp)
vmovdqa %ymm10, 1*256+6*32(%rsp)
vmovdqa %ymm11, 1*256+7*32(%rsp)
vmovdqa %ymm12, 2*256+4*32(%rsp)
vmovdqa %ymm13, 2*256+5*32(%rsp)
vmovdqa %ymm14, 2*256+6*32(%rsp)
vmovdqa %ymm15, 2*256+7*32(%rsp)
subq $1, %rcx
ja scrypt_core_6way_avx2_loop2
scrypt_shuffle_unpack2 %rsp, 0*128, %rdi, 0*256+0
scrypt_shuffle_unpack2 %rsp, 1*128, %rdi, 0*256+64
scrypt_shuffle_unpack2 %rsp, 2*128, %rdi, 1*256+0
scrypt_shuffle_unpack2 %rsp, 3*128, %rdi, 1*256+64
scrypt_shuffle_unpack2 %rsp, 4*128, %rdi, 2*256+0
scrypt_shuffle_unpack2 %rsp, 5*128, %rdi, 2*256+64
scrypt_core_6way_cleanup
ret
#endif /* USE_AVX2 */
#endif
07070100000026000081A4000003E800000064000000015EF4BCA10000435F000000000000000000000000000000000000001C00000000cpuminer-2.5.1/scrypt-x86.S/*
* Copyright 2011-2012, 2014 pooler@litecoinpool.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "cpuminer-config.h"
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#if defined(USE_ASM) && defined(__i386__)
.macro scrypt_shuffle src, so, dest, do
movl \so+60(\src), %eax
movl \so+44(\src), %ebx
movl \so+28(\src), %ecx
movl \so+12(\src), %edx
movl %eax, \do+12(\dest)
movl %ebx, \do+28(\dest)
movl %ecx, \do+44(\dest)
movl %edx, \do+60(\dest)
movl \so+40(\src), %eax
movl \so+8(\src), %ebx
movl \so+48(\src), %ecx
movl \so+16(\src), %edx
movl %eax, \do+8(\dest)
movl %ebx, \do+40(\dest)
movl %ecx, \do+16(\dest)
movl %edx, \do+48(\dest)
movl \so+20(\src), %eax
movl \so+4(\src), %ebx
movl \so+52(\src), %ecx
movl \so+36(\src), %edx
movl %eax, \do+4(\dest)
movl %ebx, \do+20(\dest)
movl %ecx, \do+36(\dest)
movl %edx, \do+52(\dest)
movl \so+0(\src), %eax
movl \so+24(\src), %ebx
movl \so+32(\src), %ecx
movl \so+56(\src), %edx
movl %eax, \do+0(\dest)
movl %ebx, \do+24(\dest)
movl %ecx, \do+32(\dest)
movl %edx, \do+56(\dest)
.endm
.macro salsa8_core_gen_quadround
movl 52(%esp), %ecx
movl 4(%esp), %edx
movl 20(%esp), %ebx
movl 8(%esp), %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 4(%esp)
movl 36(%esp), %edi
leal (%edx, %ebx), %ebp
roll $9, %ebp
xorl %ebp, %edi
movl 24(%esp), %ebp
movl %edi, 8(%esp)
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 40(%esp), %ebx
movl %ecx, 20(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 24(%esp)
movl 56(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 36(%esp)
movl 28(%esp), %ecx
movl %edx, 28(%esp)
movl 44(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 60(%esp), %ebx
movl %esi, 40(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 44(%esp)
movl 12(%esp), %edi
xorl %esi, %ebp
leal (%edx, %ebx), %esi
roll $9, %esi
xorl %esi, %edi
movl %edi, 12(%esp)
movl 48(%esp), %esi
movl %ebp, 48(%esp)
movl 64(%esp), %ebp
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl 32(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 32(%esp)
movl %ebx, %ecx
movl %edx, 52(%esp)
movl 28(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 40(%esp), %ebx
movl %esi, 28(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 40(%esp)
movl 12(%esp), %edi
xorl %esi, %ebp
leal (%edx, %ebx), %esi
roll $9, %esi
xorl %esi, %edi
movl %edi, 12(%esp)
movl 4(%esp), %esi
movl %ebp, 4(%esp)
movl 48(%esp), %ebp
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 48(%esp)
movl 32(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 32(%esp)
movl 24(%esp), %ecx
movl %edx, 24(%esp)
movl 52(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 28(%esp), %ebx
movl %esi, 28(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 52(%esp)
movl 8(%esp), %edi
xorl %esi, %ebp
leal (%edx, %ebx), %esi
roll $9, %esi
xorl %esi, %edi
movl %edi, 8(%esp)
movl 44(%esp), %esi
movl %ebp, 44(%esp)
movl 4(%esp), %ebp
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 20(%esp), %ebx
movl %ecx, 4(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl 36(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 20(%esp)
movl %ebx, %ecx
movl %edx, 36(%esp)
movl 24(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 28(%esp), %ebx
movl %esi, 24(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 28(%esp)
xorl %esi, %ebp
movl 8(%esp), %esi
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl 40(%esp), %edi
movl %ebp, 8(%esp)
movl 44(%esp), %ebp
movl %esi, 40(%esp)
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 4(%esp), %ebx
movl %ecx, 44(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 4(%esp)
movl 20(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 56(%esp)
movl 48(%esp), %ecx
movl %edx, 20(%esp)
movl 36(%esp), %edx
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %edi
movl 24(%esp), %ebx
movl %edi, 24(%esp)
addl %esi, %edi
roll $18, %edi
leal (%ecx, %edx), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 60(%esp)
movl 12(%esp), %esi
xorl %edi, %ebp
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl %esi, 12(%esp)
movl 52(%esp), %edi
movl %ebp, 36(%esp)
movl 8(%esp), %ebp
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl 32(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 32(%esp)
movl %ebx, %ecx
movl %edx, 48(%esp)
movl 20(%esp), %edx
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %edi
movl 24(%esp), %ebx
movl %edi, 20(%esp)
addl %esi, %edi
roll $18, %edi
leal (%ecx, %edx), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 8(%esp)
movl 12(%esp), %esi
xorl %edi, %ebp
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl %esi, 12(%esp)
movl 28(%esp), %edi
movl %ebp, 52(%esp)
movl 36(%esp), %ebp
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 28(%esp)
movl 32(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 32(%esp)
movl 4(%esp), %ecx
movl %edx, 4(%esp)
movl 48(%esp), %edx
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %edi
movl 20(%esp), %ebx
movl %edi, 20(%esp)
addl %esi, %edi
roll $18, %edi
leal (%ecx, %edx), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 48(%esp)
movl 40(%esp), %esi
xorl %edi, %ebp
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl %esi, 36(%esp)
movl 60(%esp), %edi
movl %ebp, 24(%esp)
movl 52(%esp), %ebp
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 44(%esp), %ebx
movl %ecx, 40(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 52(%esp)
movl 56(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 56(%esp)
addl %esi, %ebx
movl %edx, 44(%esp)
roll $13, %ebx
xorl %ebx, %edi
movl %edi, 60(%esp)
addl %esi, %edi
roll $18, %edi
xorl %edi, %ebp
movl %ebp, 64(%esp)
.endm
.text
.p2align 5
salsa8_core_gen:
salsa8_core_gen_quadround
salsa8_core_gen_quadround
ret
.text
.p2align 5
.globl scrypt_core
.globl _scrypt_core
scrypt_core:
_scrypt_core:
pushl %ebx
pushl %ebp
pushl %edi
pushl %esi
/* Check for SSE2 availability */
movl $1, %eax
cpuid
andl $0x04000000, %edx
jnz scrypt_core_sse2
scrypt_core_gen:
movl 20(%esp), %edi
movl 24(%esp), %esi
movl 28(%esp), %ecx
subl $72, %esp
.macro scrypt_core_macro1a p, q
movl \p(%edi), %eax
movl \q(%edi), %edx
movl %eax, \p(%esi)
movl %edx, \q(%esi)
xorl %edx, %eax
movl %eax, \p(%edi)
movl %eax, \p(%esp)
.endm
.macro scrypt_core_macro1b p, q
movl \p(%edi), %eax
xorl \p(%esi, %edx), %eax
movl \q(%edi), %ebx
xorl \q(%esi, %edx), %ebx
movl %ebx, \q(%edi)
xorl %ebx, %eax
movl %eax, \p(%edi)
movl %eax, \p(%esp)
.endm
.macro scrypt_core_macro2 p, q
movl \p(%esp), %eax
addl \p(%edi), %eax
movl %eax, \p(%edi)
xorl \q(%edi), %eax
movl %eax, \q(%edi)
movl %eax, \p(%esp)
.endm
.macro scrypt_core_macro3 p, q
movl \p(%esp), %eax
addl \q(%edi), %eax
movl %eax, \q(%edi)
.endm
shll $7, %ecx
addl %esi, %ecx
scrypt_core_gen_loop1:
movl %esi, 64(%esp)
movl %ecx, 68(%esp)
scrypt_core_macro1a 0, 64
scrypt_core_macro1a 4, 68
scrypt_core_macro1a 8, 72
scrypt_core_macro1a 12, 76
scrypt_core_macro1a 16, 80
scrypt_core_macro1a 20, 84
scrypt_core_macro1a 24, 88
scrypt_core_macro1a 28, 92
scrypt_core_macro1a 32, 96
scrypt_core_macro1a 36, 100
scrypt_core_macro1a 40, 104
scrypt_core_macro1a 44, 108
scrypt_core_macro1a 48, 112
scrypt_core_macro1a 52, 116
scrypt_core_macro1a 56, 120
scrypt_core_macro1a 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
scrypt_core_macro2 0, 64
scrypt_core_macro2 4, 68
scrypt_core_macro2 8, 72
scrypt_core_macro2 12, 76
scrypt_core_macro2 16, 80
scrypt_core_macro2 20, 84
scrypt_core_macro2 24, 88
scrypt_core_macro2 28, 92
scrypt_core_macro2 32, 96
scrypt_core_macro2 36, 100
scrypt_core_macro2 40, 104
scrypt_core_macro2 44, 108
scrypt_core_macro2 48, 112
scrypt_core_macro2 52, 116
scrypt_core_macro2 56, 120
scrypt_core_macro2 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
scrypt_core_macro3 0, 64
scrypt_core_macro3 4, 68
scrypt_core_macro3 8, 72
scrypt_core_macro3 12, 76
scrypt_core_macro3 16, 80
scrypt_core_macro3 20, 84
scrypt_core_macro3 24, 88
scrypt_core_macro3 28, 92
scrypt_core_macro3 32, 96
scrypt_core_macro3 36, 100
scrypt_core_macro3 40, 104
scrypt_core_macro3 44, 108
scrypt_core_macro3 48, 112
scrypt_core_macro3 52, 116
scrypt_core_macro3 56, 120
scrypt_core_macro3 60, 124
movl 64(%esp), %esi
movl 68(%esp), %ecx
addl $128, %esi
cmpl %ecx, %esi
jne scrypt_core_gen_loop1
movl 96(%esp), %esi
movl 100(%esp), %ecx
movl %ecx, %eax
subl $1, %eax
movl %eax, 100(%esp)
scrypt_core_gen_loop2:
movl %ecx, 68(%esp)
movl 64(%edi), %edx
andl 100(%esp), %edx
shll $7, %edx
scrypt_core_macro1b 0, 64
scrypt_core_macro1b 4, 68
scrypt_core_macro1b 8, 72
scrypt_core_macro1b 12, 76
scrypt_core_macro1b 16, 80
scrypt_core_macro1b 20, 84
scrypt_core_macro1b 24, 88
scrypt_core_macro1b 28, 92
scrypt_core_macro1b 32, 96
scrypt_core_macro1b 36, 100
scrypt_core_macro1b 40, 104
scrypt_core_macro1b 44, 108
scrypt_core_macro1b 48, 112
scrypt_core_macro1b 52, 116
scrypt_core_macro1b 56, 120
scrypt_core_macro1b 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
scrypt_core_macro2 0, 64
scrypt_core_macro2 4, 68
scrypt_core_macro2 8, 72
scrypt_core_macro2 12, 76
scrypt_core_macro2 16, 80
scrypt_core_macro2 20, 84
scrypt_core_macro2 24, 88
scrypt_core_macro2 28, 92
scrypt_core_macro2 32, 96
scrypt_core_macro2 36, 100
scrypt_core_macro2 40, 104
scrypt_core_macro2 44, 108
scrypt_core_macro2 48, 112
scrypt_core_macro2 52, 116
scrypt_core_macro2 56, 120
scrypt_core_macro2 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
movl 96(%esp), %esi
scrypt_core_macro3 0, 64
scrypt_core_macro3 4, 68
scrypt_core_macro3 8, 72
scrypt_core_macro3 12, 76
scrypt_core_macro3 16, 80
scrypt_core_macro3 20, 84
scrypt_core_macro3 24, 88
scrypt_core_macro3 28, 92
scrypt_core_macro3 32, 96
scrypt_core_macro3 36, 100
scrypt_core_macro3 40, 104
scrypt_core_macro3 44, 108
scrypt_core_macro3 48, 112
scrypt_core_macro3 52, 116
scrypt_core_macro3 56, 120
scrypt_core_macro3 60, 124
movl 68(%esp), %ecx
subl $1, %ecx
ja scrypt_core_gen_loop2
addl $72, %esp
popl %esi
popl %edi
popl %ebp
popl %ebx
ret
.macro salsa8_core_sse2_doubleround
movdqa %xmm1, %xmm4
paddd %xmm0, %xmm4
movdqa %xmm4, %xmm5
pslld $7, %xmm4
psrld $25, %xmm5
pxor %xmm4, %xmm3
movdqa %xmm0, %xmm4
pxor %xmm5, %xmm3
paddd %xmm3, %xmm4
movdqa %xmm4, %xmm5
pslld $9, %xmm4
psrld $23, %xmm5
pxor %xmm4, %xmm2
movdqa %xmm3, %xmm4
pxor %xmm5, %xmm2
pshufd $0x93, %xmm3, %xmm3
paddd %xmm2, %xmm4
movdqa %xmm4, %xmm5
pslld $13, %xmm4
psrld $19, %xmm5
pxor %xmm4, %xmm1
movdqa %xmm2, %xmm4
pxor %xmm5, %xmm1
pshufd $0x4e, %xmm2, %xmm2
paddd %xmm1, %xmm4
movdqa %xmm4, %xmm5
pslld $18, %xmm4
psrld $14, %xmm5
pxor %xmm4, %xmm0
movdqa %xmm3, %xmm4
pxor %xmm5, %xmm0
pshufd $0x39, %xmm1, %xmm1
paddd %xmm0, %xmm4
movdqa %xmm4, %xmm5
pslld $7, %xmm4
psrld $25, %xmm5
pxor %xmm4, %xmm1
movdqa %xmm0, %xmm4
pxor %xmm5, %xmm1
paddd %xmm1, %xmm4
movdqa %xmm4, %xmm5
pslld $9, %xmm4
psrld $23, %xmm5
pxor %xmm4, %xmm2
movdqa %xmm1, %xmm4
pxor %xmm5, %xmm2
pshufd $0x93, %xmm1, %xmm1
paddd %xmm2, %xmm4
movdqa %xmm4, %xmm5
pslld $13, %xmm4
psrld $19, %xmm5
pxor %xmm4, %xmm3
movdqa %xmm2, %xmm4
pxor %xmm5, %xmm3
pshufd $0x4e, %xmm2, %xmm2
paddd %xmm3, %xmm4
movdqa %xmm4, %xmm5
pslld $18, %xmm4
psrld $14, %xmm5
pxor %xmm4, %xmm0
pshufd $0x39, %xmm3, %xmm3
pxor %xmm5, %xmm0
.endm
.macro salsa8_core_sse2
salsa8_core_sse2_doubleround
salsa8_core_sse2_doubleround
salsa8_core_sse2_doubleround
salsa8_core_sse2_doubleround
.endm
.p2align 5
scrypt_core_sse2:
movl 20(%esp), %edi
movl 24(%esp), %esi
movl %esp, %ebp
subl $128, %esp
andl $-16, %esp
scrypt_shuffle %edi, 0, %esp, 0
scrypt_shuffle %edi, 64, %esp, 64
movdqa 96(%esp), %xmm6
movdqa 112(%esp), %xmm7
movl %esi, %edx
movl 28(%ebp), %ecx
shll $7, %ecx
addl %esi, %ecx
scrypt_core_sse2_loop1:
movdqa 0(%esp), %xmm0
movdqa 16(%esp), %xmm1
movdqa 32(%esp), %xmm2
movdqa 48(%esp), %xmm3
movdqa 64(%esp), %xmm4
movdqa 80(%esp), %xmm5
pxor %xmm4, %xmm0
pxor %xmm5, %xmm1
movdqa %xmm0, 0(%edx)
movdqa %xmm1, 16(%edx)
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm2, 32(%edx)
movdqa %xmm3, 48(%edx)
movdqa %xmm4, 64(%edx)
movdqa %xmm5, 80(%edx)
movdqa %xmm6, 96(%edx)
movdqa %xmm7, 112(%edx)
salsa8_core_sse2
paddd 0(%edx), %xmm0
paddd 16(%edx), %xmm1
paddd 32(%edx), %xmm2
paddd 48(%edx), %xmm3
movdqa %xmm0, 0(%esp)
movdqa %xmm1, 16(%esp)
movdqa %xmm2, 32(%esp)
movdqa %xmm3, 48(%esp)
pxor 64(%esp), %xmm0
pxor 80(%esp), %xmm1
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
movdqa %xmm2, %xmm6
movdqa %xmm3, %xmm7
salsa8_core_sse2
paddd 64(%esp), %xmm0
paddd 80(%esp), %xmm1
paddd %xmm2, %xmm6
paddd %xmm3, %xmm7
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
addl $128, %edx
cmpl %ecx, %edx
jne scrypt_core_sse2_loop1
movdqa 64(%esp), %xmm4
movdqa 80(%esp), %xmm5
movl 28(%ebp), %ecx
movl %ecx, %eax
subl $1, %eax
scrypt_core_sse2_loop2:
movd %xmm4, %edx
movdqa 0(%esp), %xmm0
movdqa 16(%esp), %xmm1
movdqa 32(%esp), %xmm2
movdqa 48(%esp), %xmm3
andl %eax, %edx
shll $7, %edx
pxor 0(%esi, %edx), %xmm0
pxor 16(%esi, %edx), %xmm1
pxor 32(%esi, %edx), %xmm2
pxor 48(%esi, %edx), %xmm3
pxor %xmm4, %xmm0
pxor %xmm5, %xmm1
movdqa %xmm0, 0(%esp)
movdqa %xmm1, 16(%esp)
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm2, 32(%esp)
movdqa %xmm3, 48(%esp)
salsa8_core_sse2
paddd 0(%esp), %xmm0
paddd 16(%esp), %xmm1
paddd 32(%esp), %xmm2
paddd 48(%esp), %xmm3
movdqa %xmm0, 0(%esp)
movdqa %xmm1, 16(%esp)
movdqa %xmm2, 32(%esp)
movdqa %xmm3, 48(%esp)
pxor 64(%esi, %edx), %xmm0
pxor 80(%esi, %edx), %xmm1
pxor 96(%esi, %edx), %xmm2
pxor 112(%esi, %edx), %xmm3
pxor 64(%esp), %xmm0
pxor 80(%esp), %xmm1
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
movdqa %xmm2, %xmm6
movdqa %xmm3, %xmm7
salsa8_core_sse2
paddd 64(%esp), %xmm0
paddd 80(%esp), %xmm1
paddd %xmm2, %xmm6
paddd %xmm3, %xmm7
movdqa %xmm0, %xmm4
movdqa %xmm1, %xmm5
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
subl $1, %ecx
ja scrypt_core_sse2_loop2
movdqa %xmm6, 96(%esp)
movdqa %xmm7, 112(%esp)
scrypt_shuffle %esp, 0, %edi, 0
scrypt_shuffle %esp, 64, %edi, 64
movl %ebp, %esp
popl %esi
popl %edi
popl %ebp
popl %ebx
ret
#endif
07070100000027000081A4000003E800000064000000015EF4BCA10000607F000000000000000000000000000000000000001800000000cpuminer-2.5.1/scrypt.c/*
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2014 pooler
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#include "cpuminer-config.h"
#include "miner.h"
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
static const uint32_t keypad[12] = {
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280
};
static const uint32_t innerpad[11] = {
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x000004a0
};
static const uint32_t outerpad[8] = {
0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300
};
static const uint32_t finalblk[16] = {
0x00000001, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000620
};
static inline void HMAC_SHA256_80_init(const uint32_t *key,
uint32_t *tstate, uint32_t *ostate)
{
uint32_t ihash[8];
uint32_t pad[16];
int i;
/* tstate is assumed to contain the midstate of key */
memcpy(pad, key + 16, 16);
memcpy(pad + 4, keypad, 48);
sha256_transform(tstate, pad, 0);
memcpy(ihash, tstate, 32);
sha256_init(ostate);
for (i = 0; i < 8; i++)
pad[i] = ihash[i] ^ 0x5c5c5c5c;
for (; i < 16; i++)
pad[i] = 0x5c5c5c5c;
sha256_transform(ostate, pad, 0);
sha256_init(tstate);
for (i = 0; i < 8; i++)
pad[i] = ihash[i] ^ 0x36363636;
for (; i < 16; i++)
pad[i] = 0x36363636;
sha256_transform(tstate, pad, 0);
}
static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate,
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t istate[8], ostate2[8];
uint32_t ibuf[16], obuf[16];
int i, j;
memcpy(istate, tstate, 32);
sha256_transform(istate, salt, 0);
memcpy(ibuf, salt + 16, 16);
memcpy(ibuf + 5, innerpad, 44);
memcpy(obuf + 8, outerpad, 32);
for (i = 0; i < 4; i++) {
memcpy(obuf, istate, 32);
ibuf[4] = i + 1;
sha256_transform(obuf, ibuf, 0);
memcpy(ostate2, ostate, 32);
sha256_transform(ostate2, obuf, 0);
for (j = 0; j < 8; j++)
output[8 * i + j] = swab32(ostate2[j]);
}
}
static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
const uint32_t *salt, uint32_t *output)
{
uint32_t buf[16];
int i;
sha256_transform(tstate, salt, 1);
sha256_transform(tstate, salt + 16, 1);
sha256_transform(tstate, finalblk, 0);
memcpy(buf, tstate, 32);
memcpy(buf + 8, outerpad, 32);
sha256_transform(ostate, buf, 0);
for (i = 0; i < 8; i++)
output[i] = swab32(ostate[i]);
}
#ifdef HAVE_SHA256_4WAY
static const uint32_t keypad_4way[4 * 12] = {
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000280, 0x00000280, 0x00000280, 0x00000280
};
static const uint32_t innerpad_4way[4 * 11] = {
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x000004a0, 0x000004a0, 0x000004a0, 0x000004a0
};
static const uint32_t outerpad_4way[4 * 8] = {
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000300, 0x00000300, 0x00000300, 0x00000300
};
static const uint32_t finalblk_4way[4 * 16] __attribute__((aligned(16))) = {
0x00000001, 0x00000001, 0x00000001, 0x00000001,
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000620, 0x00000620, 0x00000620, 0x00000620
};
static inline void HMAC_SHA256_80_init_4way(const uint32_t *key,
uint32_t *tstate, uint32_t *ostate)
{
uint32_t ihash[4 * 8] __attribute__((aligned(16)));
uint32_t pad[4 * 16] __attribute__((aligned(16)));
int i;
/* tstate is assumed to contain the midstate of key */
memcpy(pad, key + 4 * 16, 4 * 16);
memcpy(pad + 4 * 4, keypad_4way, 4 * 48);
sha256_transform_4way(tstate, pad, 0);
memcpy(ihash, tstate, 4 * 32);
sha256_init_4way(ostate);
for (i = 0; i < 4 * 8; i++)
pad[i] = ihash[i] ^ 0x5c5c5c5c;
for (; i < 4 * 16; i++)
pad[i] = 0x5c5c5c5c;
sha256_transform_4way(ostate, pad, 0);
sha256_init_4way(tstate);
for (i = 0; i < 4 * 8; i++)
pad[i] = ihash[i] ^ 0x36363636;
for (; i < 4 * 16; i++)
pad[i] = 0x36363636;
sha256_transform_4way(tstate, pad, 0);
}
static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t istate[4 * 8] __attribute__((aligned(16)));
uint32_t ostate2[4 * 8] __attribute__((aligned(16)));
uint32_t ibuf[4 * 16] __attribute__((aligned(16)));
uint32_t obuf[4 * 16] __attribute__((aligned(16)));
int i, j;
memcpy(istate, tstate, 4 * 32);
sha256_transform_4way(istate, salt, 0);
memcpy(ibuf, salt + 4 * 16, 4 * 16);
memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44);
memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32);
for (i = 0; i < 4; i++) {
memcpy(obuf, istate, 4 * 32);
ibuf[4 * 4 + 0] = i + 1;
ibuf[4 * 4 + 1] = i + 1;
ibuf[4 * 4 + 2] = i + 1;
ibuf[4 * 4 + 3] = i + 1;
sha256_transform_4way(obuf, ibuf, 0);
memcpy(ostate2, ostate, 4 * 32);
sha256_transform_4way(ostate2, obuf, 0);
for (j = 0; j < 4 * 8; j++)
output[4 * 8 * i + j] = swab32(ostate2[j]);
}
}
static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t buf[4 * 16] __attribute__((aligned(16)));
int i;
sha256_transform_4way(tstate, salt, 1);
sha256_transform_4way(tstate, salt + 4 * 16, 1);
sha256_transform_4way(tstate, finalblk_4way, 0);
memcpy(buf, tstate, 4 * 32);
memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
sha256_transform_4way(ostate, buf, 0);
for (i = 0; i < 4 * 8; i++)
output[i] = swab32(ostate[i]);
}
#endif /* HAVE_SHA256_4WAY */
#ifdef HAVE_SHA256_8WAY
static const uint32_t finalblk_8way[8 * 16] __attribute__((aligned(32))) = {
0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620
};
static inline void HMAC_SHA256_80_init_8way(const uint32_t *key,
uint32_t *tstate, uint32_t *ostate)
{
uint32_t ihash[8 * 8] __attribute__((aligned(32)));
uint32_t pad[8 * 16] __attribute__((aligned(32)));
int i;
/* tstate is assumed to contain the midstate of key */
memcpy(pad, key + 8 * 16, 8 * 16);
for (i = 0; i < 8; i++)
pad[8 * 4 + i] = 0x80000000;
memset(pad + 8 * 5, 0x00, 8 * 40);
for (i = 0; i < 8; i++)
pad[8 * 15 + i] = 0x00000280;
sha256_transform_8way(tstate, pad, 0);
memcpy(ihash, tstate, 8 * 32);
sha256_init_8way(ostate);
for (i = 0; i < 8 * 8; i++)
pad[i] = ihash[i] ^ 0x5c5c5c5c;
for (; i < 8 * 16; i++)
pad[i] = 0x5c5c5c5c;
sha256_transform_8way(ostate, pad, 0);
sha256_init_8way(tstate);
for (i = 0; i < 8 * 8; i++)
pad[i] = ihash[i] ^ 0x36363636;
for (; i < 8 * 16; i++)
pad[i] = 0x36363636;
sha256_transform_8way(tstate, pad, 0);
}
static inline void PBKDF2_SHA256_80_128_8way(const uint32_t *tstate,
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t istate[8 * 8] __attribute__((aligned(32)));
uint32_t ostate2[8 * 8] __attribute__((aligned(32)));
uint32_t ibuf[8 * 16] __attribute__((aligned(32)));
uint32_t obuf[8 * 16] __attribute__((aligned(32)));
int i, j;
memcpy(istate, tstate, 8 * 32);
sha256_transform_8way(istate, salt, 0);
memcpy(ibuf, salt + 8 * 16, 8 * 16);
for (i = 0; i < 8; i++)
ibuf[8 * 5 + i] = 0x80000000;
memset(ibuf + 8 * 6, 0x00, 8 * 36);
for (i = 0; i < 8; i++)
ibuf[8 * 15 + i] = 0x000004a0;
for (i = 0; i < 8; i++)
obuf[8 * 8 + i] = 0x80000000;
memset(obuf + 8 * 9, 0x00, 8 * 24);
for (i = 0; i < 8; i++)
obuf[8 * 15 + i] = 0x00000300;
for (i = 0; i < 4; i++) {
memcpy(obuf, istate, 8 * 32);
ibuf[8 * 4 + 0] = i + 1;
ibuf[8 * 4 + 1] = i + 1;
ibuf[8 * 4 + 2] = i + 1;
ibuf[8 * 4 + 3] = i + 1;
ibuf[8 * 4 + 4] = i + 1;
ibuf[8 * 4 + 5] = i + 1;
ibuf[8 * 4 + 6] = i + 1;
ibuf[8 * 4 + 7] = i + 1;
sha256_transform_8way(obuf, ibuf, 0);
memcpy(ostate2, ostate, 8 * 32);
sha256_transform_8way(ostate2, obuf, 0);
for (j = 0; j < 8 * 8; j++)
output[8 * 8 * i + j] = swab32(ostate2[j]);
}
}
static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t buf[8 * 16] __attribute__((aligned(32)));
int i;
sha256_transform_8way(tstate, salt, 1);
sha256_transform_8way(tstate, salt + 8 * 16, 1);
sha256_transform_8way(tstate, finalblk_8way, 0);
memcpy(buf, tstate, 8 * 32);
for (i = 0; i < 8; i++)
buf[8 * 8 + i] = 0x80000000;
memset(buf + 8 * 9, 0x00, 8 * 24);
for (i = 0; i < 8; i++)
buf[8 * 15 + i] = 0x00000300;
sha256_transform_8way(ostate, buf, 0);
for (i = 0; i < 8 * 8; i++)
output[i] = swab32(ostate[i]);
}
#endif /* HAVE_SHA256_8WAY */
#if defined(USE_ASM) && defined(__x86_64__)
#define SCRYPT_MAX_WAYS 12
#define HAVE_SCRYPT_3WAY 1
int scrypt_best_throughput();
void scrypt_core(uint32_t *X, uint32_t *V, int N);
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
#if defined(USE_AVX2)
#undef SCRYPT_MAX_WAYS
#define SCRYPT_MAX_WAYS 24
#define HAVE_SCRYPT_6WAY 1
void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
#endif
#elif defined(USE_ASM) && defined(__i386__)
#define SCRYPT_MAX_WAYS 4
#define scrypt_best_throughput() 1
void scrypt_core(uint32_t *X, uint32_t *V, int N);
#elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
void scrypt_core(uint32_t *X, uint32_t *V, int N);
#if defined(__ARM_NEON__)
#undef HAVE_SHA256_4WAY
#define SCRYPT_MAX_WAYS 3
#define HAVE_SCRYPT_3WAY 1
#define scrypt_best_throughput() 3
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
#endif
#elif defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
#define SCRYPT_MAX_WAYS 4
#define scrypt_best_throughput() 1
void scrypt_core(uint32_t *X, uint32_t *V, int N);
#else
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
{
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
int i;
x00 = (B[ 0] ^= Bx[ 0]);
x01 = (B[ 1] ^= Bx[ 1]);
x02 = (B[ 2] ^= Bx[ 2]);
x03 = (B[ 3] ^= Bx[ 3]);
x04 = (B[ 4] ^= Bx[ 4]);
x05 = (B[ 5] ^= Bx[ 5]);
x06 = (B[ 6] ^= Bx[ 6]);
x07 = (B[ 7] ^= Bx[ 7]);
x08 = (B[ 8] ^= Bx[ 8]);
x09 = (B[ 9] ^= Bx[ 9]);
x10 = (B[10] ^= Bx[10]);
x11 = (B[11] ^= Bx[11]);
x12 = (B[12] ^= Bx[12]);
x13 = (B[13] ^= Bx[13]);
x14 = (B[14] ^= Bx[14]);
x15 = (B[15] ^= Bx[15]);
for (i = 0; i < 8; i += 2) {
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
/* Operate on columns. */
x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
/* Operate on rows. */
x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
#undef R
}
B[ 0] += x00;
B[ 1] += x01;
B[ 2] += x02;
B[ 3] += x03;
B[ 4] += x04;
B[ 5] += x05;
B[ 6] += x06;
B[ 7] += x07;
B[ 8] += x08;
B[ 9] += x09;
B[10] += x10;
B[11] += x11;
B[12] += x12;
B[13] += x13;
B[14] += x14;
B[15] += x15;
}
static inline void scrypt_core(uint32_t *X, uint32_t *V, int N)
{
uint32_t i, j, k;
for (i = 0; i < N; i++) {
memcpy(&V[i * 32], X, 128);
xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]);
}
for (i = 0; i < N; i++) {
j = 32 * (X[16] & (N - 1));
for (k = 0; k < 32; k++)
X[k] ^= V[j + k];
xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]);
}
}
#endif
#ifndef SCRYPT_MAX_WAYS
#define SCRYPT_MAX_WAYS 1
#define scrypt_best_throughput() 1
#endif
unsigned char *scrypt_buffer_alloc(int N)
{
return malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63);
}
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
uint32_t *midstate, unsigned char *scratchpad, int N)
{
uint32_t tstate[8], ostate[8];
uint32_t X[32] __attribute__((aligned(128)));
uint32_t *V;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
memcpy(tstate, midstate, 32);
HMAC_SHA256_80_init(input, tstate, ostate);
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
scrypt_core(X, V, N);
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
}
#ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{
uint32_t tstate[4 * 8] __attribute__((aligned(128)));
uint32_t ostate[4 * 8] __attribute__((aligned(128)));
uint32_t W[4 * 32] __attribute__((aligned(128)));
uint32_t X[4 * 32] __attribute__((aligned(128)));
uint32_t *V;
int i, k;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
for (i = 0; i < 20; i++)
for (k = 0; k < 4; k++)
W[4 * i + k] = input[k * 20 + i];
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
tstate[4 * i + k] = midstate[i];
HMAC_SHA256_80_init_4way(W, tstate, ostate);
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
X[k * 32 + i] = W[4 * i + k];
scrypt_core(X + 0 * 32, V, N);
scrypt_core(X + 1 * 32, V, N);
scrypt_core(X + 2 * 32, V, N);
scrypt_core(X + 3 * 32, V, N);
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
W[4 * i + k] = X[k * 32 + i];
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
output[k * 8 + i] = W[4 * i + k];
}
#endif /* HAVE_SHA256_4WAY */
#ifdef HAVE_SCRYPT_3WAY
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{
uint32_t tstate[3 * 8], ostate[3 * 8];
uint32_t X[3 * 32] __attribute__((aligned(64)));
uint32_t *V;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
memcpy(tstate + 0, midstate, 32);
memcpy(tstate + 8, midstate, 32);
memcpy(tstate + 16, midstate, 32);
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
scrypt_core_3way(X, V, N);
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
}
#ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{
uint32_t tstate[12 * 8] __attribute__((aligned(128)));
uint32_t ostate[12 * 8] __attribute__((aligned(128)));
uint32_t W[12 * 32] __attribute__((aligned(128)));
uint32_t X[12 * 32] __attribute__((aligned(128)));
uint32_t *V;
int i, j, k;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
for (j = 0; j < 3; j++)
for (i = 0; i < 20; i++)
for (k = 0; k < 4; k++)
W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
tstate[32 * j + 4 * i + k] = midstate[i];
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
scrypt_core_3way(X + 0 * 96, V, N);
scrypt_core_3way(X + 1 * 96, V, N);
scrypt_core_3way(X + 2 * 96, V, N);
scrypt_core_3way(X + 3 * 96, V, N);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
}
#endif /* HAVE_SHA256_4WAY */
#endif /* HAVE_SCRYPT_3WAY */
#ifdef HAVE_SCRYPT_6WAY
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{
uint32_t tstate[24 * 8] __attribute__((aligned(128)));
uint32_t ostate[24 * 8] __attribute__((aligned(128)));
uint32_t W[24 * 32] __attribute__((aligned(128)));
uint32_t X[24 * 32] __attribute__((aligned(128)));
uint32_t *V;
int i, j, k;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
for (j = 0; j < 3; j++)
for (i = 0; i < 20; i++)
for (k = 0; k < 8; k++)
W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 8; k++)
tstate[8 * 8 * j + 8 * i + k] = midstate[i];
HMAC_SHA256_80_init_8way(W + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init_8way(W + 256, tstate + 64, ostate + 64);
HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128);
PBKDF2_SHA256_80_128_8way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_80_128_8way(tstate + 64, ostate + 64, W + 256, W + 256);
PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++)
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
scrypt_core_6way(X + 0 * 32, V, N);
scrypt_core_6way(X + 6 * 32, V, N);
scrypt_core_6way(X + 12 * 32, V, N);
scrypt_core_6way(X + 18 * 32, V, N);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++)
W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
PBKDF2_SHA256_128_32_8way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_128_32_8way(tstate + 64, ostate + 64, W + 256, W + 256);
PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512);
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 8; k++)
output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
}
#endif /* HAVE_SCRYPT_6WAY */
int scanhash_scrypt(int thr_id, uint32_t *pdata,
unsigned char *scratchbuf, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done, int N)
{
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
uint32_t midstate[8];
uint32_t n = pdata[19] - 1;
const uint32_t Htarg = ptarget[7];
int throughput = scrypt_best_throughput();
int i;
#ifdef HAVE_SHA256_4WAY
if (sha256_use_4way())
throughput *= 4;
#endif
for (i = 0; i < throughput; i++)
memcpy(data + i * 20, pdata, 80);
sha256_init(midstate);
sha256_transform(midstate, data, 0);
do {
for (i = 0; i < throughput; i++)
data[i * 20 + 19] = ++n;
#if defined(HAVE_SHA256_4WAY)
if (throughput == 4)
scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf, N);
else
#endif
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
if (throughput == 12)
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf, N);
else
#endif
#if defined(HAVE_SCRYPT_6WAY)
if (throughput == 24)
scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf, N);
else
#endif
#if defined(HAVE_SCRYPT_3WAY)
if (throughput == 3)
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf, N);
else
#endif
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf, N);
for (i = 0; i < throughput; i++) {
if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {
*hashes_done = n - pdata[19] + 1;
pdata[19] = data[i * 20 + 19];
return 1;
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - pdata[19] + 1;
pdata[19] = n;
return 0;
}
07070100000028000081A4000003E800000064000000015EF4BCA10000A37A000000000000000000000000000000000000001A00000000cpuminer-2.5.1/sha2-arm.S/*
* Copyright 2012 pooler@litecoinpool.org
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include "cpuminer-config.h"
#if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
.macro sha256_k
.align 2
.long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
.long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
.long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
.long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
.long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
.long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
.long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
.long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
.long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
.long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
.long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
.long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
.long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
.long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
.long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
.long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
.endm
.macro sha256_extend_doubleround_core i, rw, ra, rb, ry, rz
mov r12, \ry, ror #17
add r11, r11, \ra
eor r12, r12, \ry, ror #19
mov \ra, lr, ror #7
eor r12, r12, \ry, lsr #10
eor \ra, \ra, lr, ror #18
add r12, r12, r11
ldr r11, [\rw, #(\i+2)*4]
eor \ra, \ra, lr, lsr #3
add \ra, \ra, r12
mov r12, \rz, ror #17
str \ra, [\rw, #(\i+16)*4]
add lr, lr, \rb
eor r12, r12, \rz, ror #19
mov \rb, r11, ror #7
eor r12, r12, \rz, lsr #10
eor \rb, \rb, r11, ror #18
add lr, lr, r12
eor \rb, \rb, r11, lsr #3
add \rb, \rb, lr
.endm
.macro sha256_extend_doubleround_head i, rw, ra, rb, ry, rz
ldr lr, [\rw, #(\i+1)*4]
sha256_extend_doubleround_core \i, \rw, \ra, \rb, \ry, \rz
ldr lr, [\rw, #(\i+3)*4]
.endm
.macro sha256_extend_doubleround_body i, rw, ra, rb, ry, rz
str \rz, [\rw, #(\i+15)*4]
sha256_extend_doubleround_core \i, \rw, \ra, \rb, \ry, \rz
ldr lr, [\rw, #(\i+3)*4]
.endm
.macro sha256_extend_doubleround_foot i, rw, ra, rb, ry, rz
str \rz, [\rw, #(\i+15)*4]
sha256_extend_doubleround_core \i, \rw, \ra, \rb, \ry, \rz
str \rb, [\rw, #(\i+17)*4]
.endm
.macro sha256_main_round i, ka, rw, ra, rb, rc, rd, re, rf, rg, rh
ldr r12, [\rw, #(\i)*4]
and r3, \rf, \re
bic lr, \rg, \re
orr lr, lr, r3
ldr r3, \ka + (\i)*4
add \rh, \rh, lr
eor lr, \re, \re, ror #5
add \rh, \rh, r12
eor lr, lr, \re, ror #19
add \rh, \rh, r3
eor r3, \ra, \rb
add \rh, \rh, lr, ror #6
and r3, r3, \rc
eor r12, \ra, \ra, ror #11
and lr, \ra, \rb
eor r12, r12, \ra, ror #20
eor lr, lr, r3
add r3, \rh, lr
add \rh, \rh, \rd
add \rd, r3, r12, ror #2
.endm
.macro sha256_main_quadround i, ka, rw
sha256_main_round \i+0, \ka, \rw, r4, r5, r6, r7, r8, r9, r10, r11
sha256_main_round \i+1, \ka, \rw, r7, r4, r5, r6, r11, r8, r9, r10
sha256_main_round \i+2, \ka, \rw, r6, r7, r4, r5, r10, r11, r8, r9
sha256_main_round \i+3, \ka, \rw, r5, r6, r7, r4, r9, r10, r11, r8
.endm
.text
.code 32
.align 2
.globl sha256_transform
.globl _sha256_transform
#ifdef __ELF__
.type sha256_transform, %function
#endif
sha256_transform:
_sha256_transform:
stmfd sp!, {r4-r11, lr}
cmp r2, #0
sub sp, sp, #64*4
bne sha256_transform_swap
ldmia r1!, {r4-r11}
stmia sp, {r4-r11}
add r3, sp, #8*4
ldmia r1, {r4-r11}
stmia r3, {r4-r11}
b sha256_transform_extend
.macro bswap rd, rn
eor r12, \rn, \rn, ror #16
bic r12, r12, #0x00ff0000
mov \rd, \rn, ror #8
eor \rd, \rd, r12, lsr #8
.endm
sha256_transform_swap:
ldmia r1!, {r4-r11}
bswap r4, r4
bswap r5, r5
bswap r6, r6
bswap r7, r7
bswap r8, r8
bswap r9, r9
bswap r10, r10
bswap r11, r11
stmia sp, {r4-r11}
add r3, sp, #8*4
ldmia r1, {r4-r11}
bswap r4, r4
bswap r5, r5
bswap r6, r6
bswap r7, r7
bswap r8, r8
bswap r9, r9
bswap r10, r10
bswap r11, r11
stmia r3, {r4-r11}
sha256_transform_extend:
add r12, sp, #9*4
ldr r11, [sp, #0*4]
ldmia r12, {r4-r10}
sha256_extend_doubleround_head 0, sp, r4, r5, r9, r10
sha256_extend_doubleround_body 2, sp, r6, r7, r4, r5
sha256_extend_doubleround_body 4, sp, r8, r9, r6, r7
sha256_extend_doubleround_body 6, sp, r10, r4, r8, r9
sha256_extend_doubleround_body 8, sp, r5, r6, r10, r4
sha256_extend_doubleround_body 10, sp, r7, r8, r5, r6
sha256_extend_doubleround_body 12, sp, r9, r10, r7, r8
sha256_extend_doubleround_body 14, sp, r4, r5, r9, r10
sha256_extend_doubleround_body 16, sp, r6, r7, r4, r5
sha256_extend_doubleround_body 18, sp, r8, r9, r6, r7
sha256_extend_doubleround_body 20, sp, r10, r4, r8, r9
sha256_extend_doubleround_body 22, sp, r5, r6, r10, r4
sha256_extend_doubleround_body 24, sp, r7, r8, r5, r6
sha256_extend_doubleround_body 26, sp, r9, r10, r7, r8
sha256_extend_doubleround_body 28, sp, r4, r5, r9, r10
sha256_extend_doubleround_body 30, sp, r6, r7, r4, r5
sha256_extend_doubleround_body 32, sp, r8, r9, r6, r7
sha256_extend_doubleround_body 34, sp, r10, r4, r8, r9
sha256_extend_doubleround_body 36, sp, r5, r6, r10, r4
sha256_extend_doubleround_body 38, sp, r7, r8, r5, r6
sha256_extend_doubleround_body 40, sp, r9, r10, r7, r8
sha256_extend_doubleround_body 42, sp, r4, r5, r9, r10
sha256_extend_doubleround_body 44, sp, r6, r7, r4, r5
sha256_extend_doubleround_foot 46, sp, r8, r9, r6, r7
ldmia r0, {r4-r11}
sha256_main_quadround 0, sha256_transform_k, sp
sha256_main_quadround 4, sha256_transform_k, sp
sha256_main_quadround 8, sha256_transform_k, sp
sha256_main_quadround 12, sha256_transform_k, sp
sha256_main_quadround 16, sha256_transform_k, sp
sha256_main_quadround 20, sha256_transform_k, sp
sha256_main_quadround 24, sha256_transform_k, sp
sha256_main_quadround 28, sha256_transform_k, sp
b sha256_transform_k_over
sha256_transform_k:
sha256_k
sha256_transform_k_over:
sha256_main_quadround 32, sha256_transform_k, sp
sha256_main_quadround 36, sha256_transform_k, sp
sha256_main_quadround 40, sha256_transform_k, sp
sha256_main_quadround 44, sha256_transform_k, sp
sha256_main_quadround 48, sha256_transform_k, sp
sha256_main_quadround 52, sha256_transform_k, sp
sha256_main_quadround 56, sha256_transform_k, sp
sha256_main_quadround 60, sha256_transform_k, sp
ldmia r0, {r1, r2, r3, r12}
add r4, r4, r1
add r5, r5, r2
add r6, r6, r3
add r7, r7, r12
stmia r0!, {r4-r7}
ldmia r0, {r1, r2, r3, r12}
add r8, r8, r1
add r9, r9, r2
add r10, r10, r3
add r11, r11, r12
stmia r0, {r8-r11}
add sp, sp, #64*4
#ifdef __thumb__
ldmfd sp!, {r4-r11, lr}
bx lr
#else
ldmfd sp!, {r4-r11, pc}
#endif
.text
.code 32
.align 2
.globl sha256d_ms
.globl _sha256d_ms
#ifdef __ELF__
.type sha256d_ms, %function
#endif
sha256d_ms:
_sha256d_ms:
stmfd sp!, {r4-r11, lr}
sub sp, sp, #64*4
cmp r0, r0
ldr lr, [r1, #3*4]
ldr r6, [r1, #18*4]
ldr r7, [r1, #19*4]
mov r12, lr, ror #7
str r6, [sp, #18*4]
eor r12, r12, lr, ror #18
str r7, [sp, #19*4]
eor r12, r12, lr, lsr #3
ldr r8, [r1, #20*4]
add r6, r6, r12
ldr r10, [r1, #22*4]
add r7, r7, lr
str r6, [r1, #18*4]
mov r12, r6, ror #17
str r7, [r1, #19*4]
eor r12, r12, r6, ror #19
str r8, [sp, #20*4]
eor r12, r12, r6, lsr #10
ldr r4, [r1, #23*4]
add r8, r8, r12
ldr r5, [r1, #24*4]
mov r9, r7, ror #17
str r8, [r1, #20*4]
eor r9, r9, r7, ror #19
str r10, [sp, #21*4]
eor r9, r9, r7, lsr #10
str r4, [sp, #22*4]
mov r12, r8, ror #17
str r9, [r1, #21*4]
eor r12, r12, r8, ror #19
str r5, [sp, #23*4]
eor r12, r12, r8, lsr #10
mov lr, r9, ror #17
add r10, r10, r12
ldr r11, [r1, #30*4]
eor lr, lr, r9, ror #19
str r10, [r1, #22*4]
eor lr, lr, r9, lsr #10
str r11, [sp, #24*4]
add r4, r4, lr
mov r12, r10, ror #17
str r4, [r1, #23*4]
eor r12, r12, r10, ror #19
mov lr, r4, ror #17
eor r12, r12, r10, lsr #10
eor lr, lr, r4, ror #19
add r5, r5, r12
eor lr, lr, r4, lsr #10
str r5, [r1, #24*4]
add r6, r6, lr
mov r12, r5, ror #17
str r6, [r1, #25*4]
eor r12, r12, r5, ror #19
mov lr, r6, ror #17
eor r12, r12, r5, lsr #10
eor lr, lr, r6, ror #19
add r7, r7, r12
eor lr, lr, r6, lsr #10
str r7, [r1, #26*4]
add r8, r8, lr
mov r12, r7, ror #17
str r8, [r1, #27*4]
eor r12, r12, r7, ror #19
mov lr, r8, ror #17
eor r12, r12, r7, lsr #10
eor lr, lr, r8, ror #19
add r9, r9, r12
eor lr, lr, r8, lsr #10
str r9, [r1, #28*4]
add r10, r10, lr
ldr lr, [r1, #31*4]
mov r12, r9, ror #17
str r10, [r1, #29*4]
eor r12, r12, r9, ror #19
str lr, [sp, #25*4]
eor r12, r12, r9, lsr #10
add r11, r11, r12
add r5, r5, lr
mov r12, r10, ror #17
add r4, r4, r11
ldr r11, [r1, #16*4]
eor r12, r12, r10, ror #19
str r4, [r1, #30*4]
eor r12, r12, r10, lsr #10
add r5, r5, r12
ldr lr, [r1, #17*4]
sha256d_ms_extend_loop2:
sha256_extend_doubleround_body 16, r1, r6, r7, r4, r5
sha256_extend_doubleround_body 18, r1, r8, r9, r6, r7
sha256_extend_doubleround_body 20, r1, r10, r4, r8, r9
sha256_extend_doubleround_body 22, r1, r5, r6, r10, r4
sha256_extend_doubleround_body 24, r1, r7, r8, r5, r6
sha256_extend_doubleround_body 26, r1, r9, r10, r7, r8
sha256_extend_doubleround_body 28, r1, r4, r5, r9, r10
sha256_extend_doubleround_body 30, r1, r6, r7, r4, r5
sha256_extend_doubleround_body 32, r1, r8, r9, r6, r7
sha256_extend_doubleround_body 34, r1, r10, r4, r8, r9
sha256_extend_doubleround_body 36, r1, r5, r6, r10, r4
sha256_extend_doubleround_body 38, r1, r7, r8, r5, r6
sha256_extend_doubleround_body 40, r1, r9, r10, r7, r8
sha256_extend_doubleround_body 42, r1, r4, r5, r9, r10
bne sha256d_ms_extend_coda2
sha256_extend_doubleround_body 44, r1, r6, r7, r4, r5
sha256_extend_doubleround_foot 46, r1, r8, r9, r6, r7
ldr r4, [r3, #0*4]
ldr r9, [r3, #1*4]
ldr r10, [r3, #2*4]
ldr r11, [r3, #3*4]
ldr r8, [r3, #4*4]
ldr r5, [r3, #5*4]
ldr r6, [r3, #6*4]
ldr r7, [r3, #7*4]
b sha256d_ms_main_loop1
sha256d_ms_main_loop2:
sha256_main_round 0, sha256d_ms_k, r1, r4, r5, r6, r7, r8, r9, r10, r11
sha256_main_round 1, sha256d_ms_k, r1, r7, r4, r5, r6, r11, r8, r9, r10
sha256_main_round 2, sha256d_ms_k, r1, r6, r7, r4, r5, r10, r11, r8, r9
sha256d_ms_main_loop1:
sha256_main_round 3, sha256d_ms_k, r1, r5, r6, r7, r4, r9, r10, r11, r8
sha256_main_quadround 4, sha256d_ms_k, r1
sha256_main_quadround 8, sha256d_ms_k, r1
sha256_main_quadround 12, sha256d_ms_k, r1
sha256_main_quadround 16, sha256d_ms_k, r1
sha256_main_quadround 20, sha256d_ms_k, r1
sha256_main_quadround 24, sha256d_ms_k, r1
sha256_main_quadround 28, sha256d_ms_k, r1
b sha256d_ms_k_over
sha256d_ms_k:
sha256_k
sha256d_ms_k_over:
sha256_main_quadround 32, sha256d_ms_k, r1
sha256_main_quadround 36, sha256d_ms_k, r1
sha256_main_quadround 40, sha256d_ms_k, r1
sha256_main_quadround 44, sha256d_ms_k, r1
sha256_main_quadround 48, sha256d_ms_k, r1
sha256_main_quadround 52, sha256d_ms_k, r1
sha256_main_round 56, sha256d_ms_k, r1, r4, r5, r6, r7, r8, r9, r10, r11
bne sha256d_ms_finish
sha256_main_round 57, sha256d_ms_k, r1, r7, r4, r5, r6, r11, r8, r9, r10
sha256_main_round 58, sha256d_ms_k, r1, r6, r7, r4, r5, r10, r11, r8, r9
sha256_main_round 59, sha256d_ms_k, r1, r5, r6, r7, r4, r9, r10, r11, r8
sha256_main_quadround 60, sha256d_ms_k, r1
ldmia r2!, {r3, r12, lr}
add r4, r4, r3
add r5, r5, r12
add r6, r6, lr
stmia sp, {r4-r6}
ldmia r2, {r3, r4, r5, r6, r12}
add lr, sp, #3*4
add r7, r7, r3
add r8, r8, r4
add r9, r9, r5
add r10, r10, r6
add r11, r11, r12
add r12, sp, #18*4
stmia lr!, {r7-r11}
ldmia r12, {r4-r11}
str r4, [r1, #18*4]
str r5, [r1, #19*4]
str r6, [r1, #20*4]
str r7, [r1, #22*4]
str r8, [r1, #23*4]
str r9, [r1, #24*4]
str r10, [r1, #30*4]
str r11, [r1, #31*4]
mov r3, #0x80000000
mov r4, #0
mov r5, #0
mov r6, #0
mov r7, #0
mov r8, #0
mov r9, #0
mov r10, #0x00000100
stmia lr, {r3-r10}
ldr lr, [sp, #1*4]
movs r1, sp
ldr r4, [sp, #0*4]
ldr r11, [sp, #2*4]
mov r12, lr, ror #7
eor r12, r12, lr, ror #18
add r5, lr, #0x00a00000
eor r12, r12, lr, lsr #3
mov lr, r11, ror #7
add r4, r4, r12
eor lr, lr, r11, ror #18
str r4, [sp, #16*4]
eor lr, lr, r11, lsr #3
mov r12, r4, ror #17
add r5, r5, lr
ldr lr, [sp, #3*4]
str r5, [sp, #17*4]
eor r12, r12, r4, ror #19
mov r6, lr, ror #7
eor r12, r12, r4, lsr #10
eor r6, r6, lr, ror #18
add r11, r11, r12
eor r6, r6, lr, lsr #3
mov r12, r5, ror #17
add r6, r6, r11
ldr r11, [sp, #4*4]
str r6, [sp, #18*4]
eor r12, r12, r5, ror #19
mov r7, r11, ror #7
eor r12, r12, r5, lsr #10
eor r7, r7, r11, ror #18
add lr, lr, r12
eor r7, r7, r11, lsr #3
mov r12, r6, ror #17
add r7, r7, lr
ldr lr, [sp, #5*4]
str r7, [sp, #19*4]
eor r12, r12, r6, ror #19
mov r8, lr, ror #7
eor r12, r12, r6, lsr #10
eor r8, r8, lr, ror #18
add r11, r11, r12
eor r8, r8, lr, lsr #3
mov r12, r7, ror #17
add r8, r8, r11
ldr r11, [sp, #6*4]
str r8, [sp, #20*4]
eor r12, r12, r7, ror #19
mov r9, r11, ror #7
eor r12, r12, r7, lsr #10
eor r9, r9, r11, ror #18
add lr, lr, r12
eor r9, r9, r11, lsr #3
mov r12, r8, ror #17
add r9, r9, lr
ldr lr, [sp, #7*4]
str r9, [sp, #21*4]
eor r12, r12, r8, ror #19
mov r10, lr, ror #7
eor r12, r12, r8, lsr #10
eor r10, r10, lr, ror #18
add r11, r11, r12
eor r10, r10, lr, lsr #3
mov r12, r9, ror #17
add r11, r11, #0x00000100
add lr, lr, r4
add r10, r10, r11
eor r12, r12, r9, ror #19
str r10, [sp, #22*4]
add lr, lr, #0x11000000
eor r12, r12, r9, lsr #10
add lr, lr, r12
mov r12, r10, ror #17
add r4, lr, #0x00002000
eor r12, r12, r10, ror #19
str r4, [sp, #23*4]
add r5, r5, #0x80000000
eor r12, r12, r10, lsr #10
add r5, r5, r12
mov r12, r4, ror #17
str r5, [sp, #24*4]
eor r12, r12, r4, ror #19
mov r11, r5, ror #17
eor r12, r12, r4, lsr #10
eor r11, r11, r5, ror #19
add r6, r6, r12
eor r11, r11, r5, lsr #10
str r6, [sp, #25*4]
add r7, r7, r11
mov r12, r6, ror #17
str r7, [sp, #26*4]
eor r12, r12, r6, ror #19
mov r11, r7, ror #17
eor r12, r12, r6, lsr #10
eor r11, r11, r7, ror #19
add r8, r8, r12
eor r11, r11, r7, lsr #10
str r8, [sp, #27*4]
add r9, r9, r11
mov lr, r8, ror #17
mov r12, r9, ror #17
str r9, [sp, #28*4]
add r4, r4, #0x00400000
eor lr, lr, r8, ror #19
eor r12, r12, r9, ror #19
eor lr, lr, r8, lsr #10
eor r12, r12, r9, lsr #10
add r4, r4, #0x00000022
add r10, r10, lr
add r4, r4, r12
ldr r11, [sp, #16*4]
add r5, r5, #0x00000100
str r4, [sp, #30*4]
mov lr, r11, ror #7
str r10, [sp, #29*4]
mov r12, r10, ror #17
eor lr, lr, r11, ror #18
eor r12, r12, r10, ror #19
eor lr, lr, r11, lsr #3
eor r12, r12, r10, lsr #10
add r5, r5, lr
ldr lr, [r1, #17*4]
add r5, r5, r12
b sha256d_ms_extend_loop2
sha256d_ms_extend_coda2:
str r5, [r1, #(44+15)*4]
mov r12, r4, ror #17
add r11, r11, r6
mov r6, lr, ror #7
eor r12, r12, r4, ror #19
eor r6, r6, lr, ror #18
eor r12, r12, r4, lsr #10
eor r6, r6, lr, lsr #3
add r12, r12, r11
add r6, r6, r12
str r6, [r1, #(44+16)*4]
adr r2, sha256d_ms_h
ldmia r2, {r4-r11}
b sha256d_ms_main_loop2
sha256d_ms_h:
.long 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a
.long 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
.macro sha256_main_round_red i, ka, rw, rd, re, rf, rg, rh
ldr r12, [\rw, #(\i)*4]
and r3, \rf, \re
bic lr, \rg, \re
add \rh, \rh, \rd
orr lr, lr, r3
ldr r3, \ka + (\i)*4
add \rh, \rh, lr
eor lr, \re, \re, ror #5
add \rh, \rh, r12
eor lr, lr, \re, ror #19
add \rh, \rh, r3
add \rh, \rh, lr, ror #6
.endm
sha256d_ms_finish:
sha256_main_round_red 57, sha256d_ms_k, r1, r6, r11, r8, r9, r10
sha256_main_round_red 58, sha256d_ms_k, r1, r5, r10, r11, r8, r9
sha256_main_round_red 59, sha256d_ms_k, r1, r4, r9, r10, r11, r8
ldr r5, [r2, #7*4]
sha256_main_round_red 60, sha256d_ms_k, r1, r7, r8, r9, r10, r11
add r11, r11, r5
str r11, [r0, #7*4]
add sp, sp, #64*4
#ifdef __thumb__
ldmfd sp!, {r4-r11, lr}
bx lr
#else
ldmfd sp!, {r4-r11, pc}
#endif
#ifdef __ARM_NEON__
.text
.code 32
.align 2
.globl sha256_init_4way
.globl _sha256_init_4way
#ifdef __ELF__
.type sha256_init_4way, %function
#endif
sha256_init_4way:
_sha256_init_4way:
adr r12, sha256_4h
vldmia r12, {q8-q15}
vstmia r0, {q8-q15}
bx lr
.align 4
sha256_4h:
.long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
.long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85
.long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372
.long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a
.long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f
.long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c
.long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
.long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
.macro sha256_4k
.long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98
.long 0x71374491, 0x71374491, 0x71374491, 0x71374491
.long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf
.long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5
.long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b
.long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1
.long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4
.long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5
.long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98
.long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01
.long 0x243185be, 0x243185be, 0x243185be, 0x243185be
.long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3
.long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74
.long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe
.long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7
.long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174
.long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1
.long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786
.long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6
.long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc
.long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f
.long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa
.long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc
.long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da
.long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152
.long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d
.long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8
.long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7
.long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3
.long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147
.long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351
.long 0x14292967, 0x14292967, 0x14292967, 0x14292967
.long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85
.long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138
.long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc
.long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13
.long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354
.long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb
.long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e
.long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85
.long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1
.long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b
.long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70
.long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3
.long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819
.long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624
.long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585
.long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070
.long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116
.long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08
.long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c
.long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5
.long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3
.long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a
.long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f
.long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3
.long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee
.long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f
.long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814
.long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208
.long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa
.long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb
.long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7
.long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2
.endm
.macro sha256_4way_extend_doubleround_core i, rr, rw, ra, rb, ry, rz
vadd.u32 q5, q5, \ra
veor.u32 q4, q4, q0
vshr.u32 q0, \ry, #19
vshl.u32 q1, \ry, #32-19
veor.u32 q4, q4, q0
vshr.u32 \ra, q6, #7
vshl.u32 q0, q6, #32-7
veor.u32 q4, q4, q1
veor.u32 \ra, \ra, q0
vshr.u32 q1, \ry, #10
vshr.u32 q0, q6, #18
veor.u32 q4, q4, q1
veor.u32 \ra, \ra, q0
vshl.u32 q1, q6, #32-18
vshr.u32 q0, q6, #3
veor.u32 \ra, \ra, q1
vadd.u32 q4, q4, q5
veor.u32 \ra, \ra, q0
vld1.u32 {q5}, [\rr]!
vadd.u32 \ra, \ra, q4
vshr.u32 q4, \rz, #17
vshl.u32 q0, \rz, #32-17
vadd.u32 q6, q6, \rb
vst1.u32 {\ra}, [\rw]!
veor.u32 q4, q4, q0
vshr.u32 q0, \rz, #19
vshl.u32 q1, \rz, #32-19
veor.u32 q4, q4, q0
vshr.u32 \rb, q5, #7
veor.u32 q4, q4, q1
vshl.u32 q0, q5, #32-7
vshr.u32 q1, \rz, #10
veor.u32 \rb, \rb, q0
vshr.u32 q0, q5, #18
veor.u32 q4, q4, q1
veor.u32 \rb, \rb, q0
vshl.u32 q1, q5, #32-18
vshr.u32 q0, q5, #3
veor.u32 \rb, \rb, q1
vadd.u32 q1, q6, q4
veor.u32 \rb, \rb, q0
.endm
.macro sha256_4way_extend_doubleround_head i, rr, rw, ra, rb, ry, rz
vld1.u32 {q6}, [\rr]!
vshr.u32 q4, \ry, #17
vshl.u32 q0, \ry, #32-17
sha256_4way_extend_doubleround_core \i, \rr, \rw, \ra, \rb, \ry, \rz
vld1.u32 {q6}, [\rr]!
vadd.u32 \rb, \rb, q1
.endm
.macro sha256_4way_extend_doubleround_body i, rr, rw, ra, rb, ry, rz
vshr.u32 q4, \ry, #17
vshl.u32 q0, \ry, #32-17
vst1.u32 {\rz}, [\rw]!
sha256_4way_extend_doubleround_core \i, \rr, \rw, \ra, \rb, \ry, \rz
vld1.u32 {q6}, [\rr]!
vadd.u32 \rb, \rb, q1
.endm
.macro sha256_4way_extend_doubleround_foot i, rr, rw, ra, rb, ry, rz
vshr.u32 q4, \ry, #17
vshl.u32 q0, \ry, #32-17
vst1.u32 {\rz}, [\rw]!
sha256_4way_extend_doubleround_core \i, \rr, \rw, \ra, \rb, \ry, \rz
vadd.u32 \rb, \rb, q1
vst1.u32 {\rb}, [\rw]!
.endm
.macro sha256_4way_main_round i, rk, rw, ra, rb, rc, rd, re, rf, rg, rh
vld1.u32 {q8}, [\rw]!
vand.u32 q9, \rf, \re
vbic.u32 q10, \rg, \re
vshr.u32 q11, \re, #5
vorr.u32 q10, q10, q9
vld1.u32 {q9}, [\rk]!
vadd.u32 \rh, \rh, q10
vshl.u32 q12, \re, #32-5
veor.u32 q10, \re, q11
vshr.u32 q11, \re, #19
veor.u32 q10, q10, q12
vshl.u32 q12, \re, #32-19
veor.u32 q10, q10, q11
vadd.u32 \rh, \rh, q8
veor.u32 q10, q10, q12
vadd.u32 \rh, \rh, q9
veor.u32 q9, \ra, \rb
vshr.u32 q11, q10, #6
vshl.u32 q13, q10, #32-6
vadd.u32 \rh, \rh, q11
vshr.u32 q11, \ra, #11
vshl.u32 q12, \ra, #32-11
veor.u32 q8, \ra, q11
vand.u32 q10, \ra, \rb
veor.u32 q8, q8, q12
vshr.u32 q11, \ra, #20
vshl.u32 q12, \ra, #32-20
veor.u32 q8, q8, q11
vand.u32 q9, q9, \rc
veor.u32 q8, q8, q12
vadd.u32 \rh, \rh, q13
veor.u32 q10, q10, q9
vshr.u32 q11, q8, #2
vshl.u32 q12, q8, #32-2
vadd.u32 q9, \rh, q10
vadd.u32 q12, q12, q11
vadd.u32 \rh, \rh, \rd
vadd.u32 \rd, q9, q12
.endm
.macro sha256_4way_main_quadround i, rk, rw
sha256_4way_main_round \i+0, \rk, \rw, q0, q1, q2, q3, q4, q5, q6, q7
sha256_4way_main_round \i+1, \rk, \rw, q3, q0, q1, q2, q7, q4, q5, q6
sha256_4way_main_round \i+2, \rk, \rw, q2, q3, q0, q1, q6, q7, q4, q5
sha256_4way_main_round \i+3, \rk, \rw, q1, q2, q3, q0, q5, q6, q7, q4
.endm
.text
.code 32
.align 2
.globl sha256_transform_4way
.globl _sha256_transform_4way
#ifdef __ELF__
.type sha256_transform_4way, %function
#endif
sha256_transform_4way:
_sha256_transform_4way:
stmfd sp!, {r4, lr}
vpush {q4-q7}
mov r12, sp
sub sp, sp, #64*16
bic sp, sp, #63
cmp r2, #0
bne sha256_transform_4way_swap
vldmia r1!, {q0-q7}
vstmia sp, {q0-q7}
add r3, sp, #8*16
vldmia r1, {q8-q15}
vstmia r3, {q8-q15}
b sha256_transform_4way_extend
sha256_transform_4way_swap:
vldmia r1!, {q0-q7}
vrev32.8 q0, q0
vrev32.8 q1, q1
vrev32.8 q2, q2
vrev32.8 q3, q3
vldmia r1, {q8-q15}
vrev32.8 q4, q4
vrev32.8 q5, q5
vrev32.8 q6, q6
vrev32.8 q7, q7
vstmia sp, {q0-q7}
vrev32.8 q8, q8
vrev32.8 q9, q9
vrev32.8 q10, q10
vrev32.8 q11, q11
vrev32.8 q12, q12
vrev32.8 q13, q13
vrev32.8 q14, q14
vrev32.8 q15, q15
add r3, sp, #8*16
vstmia r3, {q8-q15}
sha256_transform_4way_extend:
add r1, sp, #1*16
add r2, sp, #16*16
vmov.u32 q5, q0
sha256_4way_extend_doubleround_head 0, r1, r2, q9, q10, q14, q15
sha256_4way_extend_doubleround_body 2, r1, r2, q11, q12, q9, q10
sha256_4way_extend_doubleround_body 4, r1, r2, q13, q14, q11, q12
sha256_4way_extend_doubleround_body 6, r1, r2, q15, q9, q13, q14
sha256_4way_extend_doubleround_body 8, r1, r2, q10, q11, q15, q9
sha256_4way_extend_doubleround_body 10, r1, r2, q12, q13, q10, q11
sha256_4way_extend_doubleround_body 12, r1, r2, q14, q15, q12, q13
sha256_4way_extend_doubleround_body 14, r1, r2, q9, q10, q14, q15
sha256_4way_extend_doubleround_body 16, r1, r2, q11, q12, q9, q10
sha256_4way_extend_doubleround_body 18, r1, r2, q13, q14, q11, q12
sha256_4way_extend_doubleround_body 20, r1, r2, q15, q9, q13, q14
sha256_4way_extend_doubleround_body 22, r1, r2, q10, q11, q15, q9
sha256_4way_extend_doubleround_body 24, r1, r2, q12, q13, q10, q11
sha256_4way_extend_doubleround_body 26, r1, r2, q14, q15, q12, q13
sha256_4way_extend_doubleround_body 28, r1, r2, q9, q10, q14, q15
sha256_4way_extend_doubleround_body 30, r1, r2, q11, q12, q9, q10
sha256_4way_extend_doubleround_body 32, r1, r2, q13, q14, q11, q12
sha256_4way_extend_doubleround_body 34, r1, r2, q15, q9, q13, q14
sha256_4way_extend_doubleround_body 36, r1, r2, q10, q11, q15, q9
sha256_4way_extend_doubleround_body 38, r1, r2, q12, q13, q10, q11
sha256_4way_extend_doubleround_body 40, r1, r2, q14, q15, q12, q13
sha256_4way_extend_doubleround_body 42, r1, r2, q9, q10, q14, q15
sha256_4way_extend_doubleround_body 44, r1, r2, q11, q12, q9, q10
sha256_4way_extend_doubleround_foot 46, r1, r2, q13, q14, q11, q12
vldmia r0, {q0-q7}
adr r4, sha256_transform_4way_4k
b sha256_transform_4way_4k_over
.align 4
sha256_transform_4way_4k:
sha256_4k
sha256_transform_4way_4k_over:
sha256_4way_main_quadround 0, r4, sp
sha256_4way_main_quadround 4, r4, sp
sha256_4way_main_quadround 8, r4, sp
sha256_4way_main_quadround 12, r4, sp
sha256_4way_main_quadround 16, r4, sp
sha256_4way_main_quadround 20, r4, sp
sha256_4way_main_quadround 24, r4, sp
sha256_4way_main_quadround 28, r4, sp
sha256_4way_main_quadround 32, r4, sp
sha256_4way_main_quadround 36, r4, sp
sha256_4way_main_quadround 40, r4, sp
sha256_4way_main_quadround 44, r4, sp
sha256_4way_main_quadround 48, r4, sp
sha256_4way_main_quadround 52, r4, sp
sha256_4way_main_quadround 56, r4, sp
sha256_4way_main_quadround 60, r4, sp
vldmia r0, {q8-q15}
vadd.u32 q0, q0, q8
vadd.u32 q1, q1, q9
vadd.u32 q2, q2, q10
vadd.u32 q3, q3, q11
vadd.u32 q4, q4, q12
vadd.u32 q5, q5, q13
vadd.u32 q6, q6, q14
vadd.u32 q7, q7, q15
vstmia r0, {q0-q7}
mov sp, r12
vpop {q4-q7}
ldmfd sp!, {r4, pc}
.text
.code 32
.align 2
.globl sha256d_ms_4way
.globl _sha256d_ms_4way
#ifdef __ELF__
.type sha256d_ms_4way, %function
#endif
sha256d_ms_4way:
_sha256d_ms_4way:
stmfd sp!, {r4, lr}
vpush {q4-q7}
mov r12, sp
sub sp, sp, #64*16
bic sp, sp, #63
add r4, r1, #3*16
vld1.u32 {q6}, [r4]!
add r1, r1, #18*16
vldmia r1, {q11-q13}
cmp r0, r0
vshr.u32 q10, q6, #7
vshl.u32 q0, q6, #32-7
vshr.u32 q1, q6, #18
veor.u32 q10, q10, q0
vshl.u32 q0, q6, #32-18
veor.u32 q10, q10, q1
vshr.u32 q1, q6, #3
veor.u32 q10, q10, q0
vstmia sp!, {q11-q13}
veor.u32 q4, q10, q1
vadd.u32 q12, q12, q6
vadd.u32 q11, q11, q4
vshr.u32 q14, q12, #17
vshr.u32 q4, q11, #17
vshl.u32 q0, q11, #32-17
vst1.u32 {q11}, [r1]!
veor.u32 q4, q4, q0
vshr.u32 q0, q11, #19
vshl.u32 q1, q11, #32-19
veor.u32 q4, q4, q0
vst1.u32 {q12}, [r1]!
veor.u32 q4, q4, q1
vshr.u32 q1, q11, #10
vshl.u32 q0, q12, #32-17
veor.u32 q4, q4, q1
veor.u32 q14, q14, q0
vadd.u32 q13, q13, q4
vshr.u32 q0, q12, #19
vshl.u32 q1, q12, #32-19
veor.u32 q14, q14, q0
vst1.u32 {q13}, [r1]!
veor.u32 q14, q14, q1
vshr.u32 q1, q12, #10
vshr.u32 q4, q13, #17
vshl.u32 q0, q13, #32-17
veor.u32 q14, q14, q1
veor.u32 q4, q4, q0
vshr.u32 q0, q13, #19
vshl.u32 q1, q13, #32-19
veor.u32 q4, q4, q0
vst1.u32 {q14}, [r1]!
veor.u32 q4, q4, q1
vshr.u32 q1, q13, #10
vld1.u32 {q15}, [r1]
veor.u32 q4, q4, q1
vst1.u32 {q15}, [sp]!
vadd.u32 q15, q15, q4
vshr.u32 q4, q14, #17
vshl.u32 q0, q14, #32-17
vshl.u32 q1, q14, #32-19
veor.u32 q4, q4, q0
vshr.u32 q0, q14, #19
vst1.u32 {q15}, [r1]!
veor.u32 q4, q4, q0
vld1.u32 {q9}, [r1]
veor.u32 q4, q4, q1
vshr.u32 q1, q14, #10
vst1.u32 {q9}, [sp]!
veor.u32 q5, q4, q1
vshr.u32 q4, q15, #17
vadd.u32 q9, q9, q5
vshl.u32 q0, q15, #32-17
vshl.u32 q1, q15, #32-19
veor.u32 q4, q4, q0
vshr.u32 q0, q15, #19
vst1.u32 {q9}, [r1]!
veor.u32 q4, q4, q0
vld1.u32 {q10}, [r1]
veor.u32 q4, q4, q1
vshr.u32 q1, q15, #10
vst1.u32 {q10}, [sp]!
veor.u32 q4, q4, q1
vshl.u32 q0, q9, #32-17
vadd.u32 q10, q10, q4
vshr.u32 q4, q9, #17
vshl.u32 q1, q9, #32-19
veor.u32 q4, q4, q0
vshr.u32 q0, q9, #19
veor.u32 q4, q4, q1
vshr.u32 q1, q9, #10
veor.u32 q4, q4, q0
vst1.u32 {q10}, [r1]!
veor.u32 q5, q4, q1
vshr.u32 q4, q10, #17
vshl.u32 q0, q10, #32-17
vadd.u32 q11, q11, q5
veor.u32 q4, q4, q0
vshr.u32 q0, q10, #19
vshl.u32 q1, q10, #32-19
veor.u32 q4, q4, q0
vst1.u32 {q11}, [r1]!
veor.u32 q4, q4, q1
vshr.u32 q1, q10, #10
vshl.u32 q0, q11, #32-17
veor.u32 q2, q4, q1
vshr.u32 q4, q11, #17
vadd.u32 q12, q12, q2
vshl.u32 q1, q11, #32-19
veor.u32 q4, q4, q0
vshr.u32 q0, q11, #19
veor.u32 q4, q4, q1
vshr.u32 q1, q11, #10
veor.u32 q4, q4, q0
vst1.u32 {q12}, [r1]!
veor.u32 q5, q4, q1
vshr.u32 q4, q12, #17
vshl.u32 q0, q12, #32-17
vadd.u32 q13, q13, q5
veor.u32 q4, q4, q0
vshr.u32 q0, q12, #19
vshl.u32 q1, q12, #32-19
veor.u32 q4, q4, q0
vst1.u32 {q13}, [r1]!
veor.u32 q4, q4, q1
vshr.u32 q1, q12, #10
vshl.u32 q0, q13, #32-17
veor.u32 q2, q4, q1
vshr.u32 q4, q13, #17
vadd.u32 q14, q14, q2
vshl.u32 q1, q13, #32-19
veor.u32 q4, q4, q0
vshr.u32 q0, q13, #19
veor.u32 q4, q4, q1
vshr.u32 q1, q13, #10
veor.u32 q4, q4, q0
vst1.u32 {q14}, [r1]!
veor.u32 q5, q4, q1
add r4, r4, #12*16
vshr.u32 q4, q14, #17
vshl.u32 q0, q14, #32-17
vadd.u32 q15, q15, q5
veor.u32 q4, q4, q0
vshr.u32 q0, q14, #19
vshl.u32 q1, q14, #32-19
veor.u32 q4, q4, q0
vst1.u32 {q15}, [r1]!
veor.u32 q4, q4, q1
vshr.u32 q1, q14, #10
vld1.u32 {q2}, [r1]
veor.u32 q4, q4, q1
vshl.u32 q0, q15, #32-17
vadd.u32 q9, q9, q4
vst1.u32 {q2}, [sp]!
vadd.u32 q9, q9, q2
vshr.u32 q4, q15, #17
vshr.u32 q2, q15, #19
veor.u32 q4, q4, q0
vst1.u32 {q9}, [r1]!
vshl.u32 q1, q15, #32-19
veor.u32 q4, q4, q2
vshr.u32 q0, q15, #10
veor.u32 q4, q4, q1
vld1.u32 {q5-q6}, [r4]!
veor.u32 q4, q4, q0
vld1.u32 {q2}, [r1]
vadd.u32 q10, q10, q4
vst1.u32 {q2}, [sp]!
vadd.u32 q10, q10, q2
sub sp, sp, #8*16
sha256d_ms_4way_extend_loop2:
sha256_4way_extend_doubleround_body 16, r4, r1, q11, q12, q9, q10
sha256_4way_extend_doubleround_body 18, r4, r1, q13, q14, q11, q12
sha256_4way_extend_doubleround_body 20, r4, r1, q15, q9, q13, q14
sha256_4way_extend_doubleround_body 22, r4, r1, q10, q11, q15, q9
sha256_4way_extend_doubleround_body 24, r4, r1, q12, q13, q10, q11
sha256_4way_extend_doubleround_body 26, r4, r1, q14, q15, q12, q13
sha256_4way_extend_doubleround_body 28, r4, r1, q9, q10, q14, q15
sha256_4way_extend_doubleround_body 30, r4, r1, q11, q12, q9, q10
sha256_4way_extend_doubleround_body 32, r4, r1, q13, q14, q11, q12
sha256_4way_extend_doubleround_body 34, r4, r1, q15, q9, q13, q14
sha256_4way_extend_doubleround_body 36, r4, r1, q10, q11, q15, q9
sha256_4way_extend_doubleround_body 38, r4, r1, q12, q13, q10, q11
sha256_4way_extend_doubleround_body 40, r4, r1, q14, q15, q12, q13
sha256_4way_extend_doubleround_body 42, r4, r1, q9, q10, q14, q15
sha256_4way_extend_doubleround_body 44, r4, r1, q11, q12, q9, q10
sha256_4way_extend_doubleround_foot 46, r4, r1, q13, q14, q11, q12
bne sha256d_ms_4way_extend_coda2
vldmia r3!, {q4-q7}
vldmia r3, {q0-q3}
vswp q0, q4
adr r3, sha256d_ms_4way_4k+3*16
sub r1, r1, #(64-3)*16
b sha256d_ms_4way_main_loop1
.align 4
sha256d_ms_4way_4k:
sha256_4k
sha256d_ms_4way_main_loop2:
sha256_4way_main_round 0, r3, r1, q0, q1, q2, q3, q4, q5, q6, q7
sha256_4way_main_round 1, r3, r1, q3, q0, q1, q2, q7, q4, q5, q6
sha256_4way_main_round 2, r3, r1, q2, q3, q0, q1, q6, q7, q4, q5
sha256d_ms_4way_main_loop1:
sha256_4way_main_round 3, r3, r1, q1, q2, q3, q0, q5, q6, q7, q4
sha256_4way_main_quadround 4, r3, r1
sha256_4way_main_quadround 8, r3, r1
sha256_4way_main_quadround 12, r3, r1
sha256_4way_main_quadround 16, r3, r1
sha256_4way_main_quadround 20, r3, r1
sha256_4way_main_quadround 24, r3, r1
sha256_4way_main_quadround 28, r3, r1
sha256_4way_main_quadround 32, r3, r1
sha256_4way_main_quadround 36, r3, r1
sha256_4way_main_quadround 40, r3, r1
sha256_4way_main_quadround 44, r3, r1
sha256_4way_main_quadround 48, r3, r1
sha256_4way_main_quadround 52, r3, r1
sha256_4way_main_round 56, r3, r1, q0, q1, q2, q3, q4, q5, q6, q7
bne sha256d_ms_4way_finish
sha256_4way_main_round 57, r3, r1, q3, q0, q1, q2, q7, q4, q5, q6
sha256_4way_main_round 58, r3, r1, q2, q3, q0, q1, q6, q7, q4, q5
sha256_4way_main_round 59, r3, r1, q1, q2, q3, q0, q5, q6, q7, q4
sha256_4way_main_quadround 60, r3, r1
vldmia r2, {q8-q15}
vadd.u32 q0, q0, q8
vadd.u32 q1, q1, q9
vadd.u32 q2, q2, q10
vadd.u32 q3, q3, q11
vadd.u32 q4, q4, q12
vadd.u32 q5, q5, q13
vadd.u32 q6, q6, q14
vadd.u32 q7, q7, q15
vldmia sp, {q8-q15}
sub r1, r1, #(64-18)*16
vstmia r1, {q8-q10}
add r1, r1, #4*16
vstmia r1, {q11-q13}
add r1, r1, #8*16
vstmia r1, {q14-q15}
vstmia sp, {q0-q7}
vmov.u32 q8, #0x80000000
vmov.u32 q9, #0
vmov.u32 q10, #0
vmov.u32 q11, #0
vmov.u32 q12, #0
vmov.u32 q13, #0
vmov.u32 q14, #0
vmov.u32 q15, #0x00000100
add r1, sp, #8*16
vstmia r1!, {q8-q15}
adds r4, sp, #2*16
vshr.u32 q9, q1, #7
vshl.u32 q2, q1, #32-7
vshr.u32 q4, q1, #18
veor.u32 q9, q9, q2
vshl.u32 q3, q1, #32-18
veor.u32 q9, q9, q4
vshr.u32 q2, q1, #3
veor.u32 q9, q9, q3
vld1.u32 {q5}, [r4]!
veor.u32 q9, q9, q2
vmov.u32 q7, #0x00a00000
vadd.u32 q9, q9, q0
vshr.u32 q10, q5, #7
vshl.u32 q0, q5, #32-7
vshl.u32 q3, q5, #32-18
veor.u32 q10, q10, q0
vshr.u32 q0, q5, #18
veor.u32 q10, q10, q3
vst1.u32 {q9}, [r1]!
vadd.u32 q3, q1, q7
veor.u32 q10, q10, q0
vshr.u32 q0, q5, #3
vld1.u32 {q6}, [r4]!
veor.u32 q10, q10, q0
vshr.u32 q4, q9, #17
vshl.u32 q0, q9, #32-17
vadd.u32 q10, q10, q3
veor.u32 q4, q4, q0
vshr.u32 q0, q9, #19
vshl.u32 q1, q9, #32-19
veor.u32 q4, q4, q0
vshr.u32 q11, q6, #7
vshl.u32 q0, q6, #32-7
veor.u32 q4, q4, q1
veor.u32 q11, q11, q0
vshr.u32 q1, q9, #10
vshr.u32 q0, q6, #18
veor.u32 q4, q4, q1
veor.u32 q11, q11, q0
vshl.u32 q1, q6, #32-18
vshr.u32 q0, q6, #3
veor.u32 q11, q11, q1
vadd.u32 q4, q4, q5
veor.u32 q11, q11, q0
vld1.u32 {q5}, [r4]!
vadd.u32 q11, q11, q4
vshr.u32 q4, q10, #17
vshl.u32 q0, q10, #32-17
vst1.u32 {q10}, [r1]!
veor.u32 q4, q4, q0
vshr.u32 q0, q10, #19
vshl.u32 q1, q10, #32-19
veor.u32 q4, q4, q0
vshr.u32 q12, q5, #7
veor.u32 q4, q4, q1
vshl.u32 q0, q5, #32-7
vshr.u32 q1, q10, #10
veor.u32 q12, q12, q0
vshr.u32 q0, q5, #18
veor.u32 q4, q4, q1
veor.u32 q12, q12, q0
vshl.u32 q1, q5, #32-18
vst1.u32 {q11}, [r1]!
veor.u32 q12, q12, q1
vshr.u32 q0, q5, #3
vadd.u32 q1, q6, q4
veor.u32 q12, q12, q0
vshr.u32 q4, q11, #17
vshl.u32 q0, q11, #32-17
vadd.u32 q12, q12, q1
vld1.u32 {q6}, [r4]!
veor.u32 q4, q4, q0
vshr.u32 q0, q11, #19
vshl.u32 q1, q11, #32-19
veor.u32 q4, q4, q0
vshr.u32 q13, q6, #7
vshl.u32 q0, q6, #32-7
veor.u32 q4, q4, q1
veor.u32 q13, q13, q0
vshr.u32 q1, q11, #10
vshr.u32 q0, q6, #18
veor.u32 q4, q4, q1
veor.u32 q13, q13, q0
vshl.u32 q1, q6, #32-18
vshr.u32 q0, q6, #3
veor.u32 q13, q13, q1
vadd.u32 q4, q4, q5
veor.u32 q13, q13, q0
vld1.u32 {q5}, [r4]!
vadd.u32 q13, q13, q4
vshr.u32 q4, q12, #17
vshl.u32 q0, q12, #32-17
vst1.u32 {q12}, [r1]!
veor.u32 q4, q4, q0
vshr.u32 q0, q12, #19
vshl.u32 q1, q12, #32-19
veor.u32 q4, q4, q0
vshr.u32 q14, q5, #7
veor.u32 q4, q4, q1
vshl.u32 q0, q5, #32-7
vshr.u32 q1, q12, #10
veor.u32 q14, q14, q0
vshr.u32 q0, q5, #18
veor.u32 q4, q4, q1
veor.u32 q14, q14, q0
vshl.u32 q1, q5, #32-18
vst1.u32 {q13}, [r1]!
veor.u32 q14, q14, q1
vshr.u32 q0, q5, #3
vadd.u32 q1, q6, q4
veor.u32 q14, q14, q0
vshr.u32 q4, q13, #17
vshl.u32 q0, q13, #32-17
vadd.u32 q14, q14, q1
vld1.u32 {q6}, [r4]!
vadd.u32 q5, q5, q15
veor.u32 q4, q4, q0
vshr.u32 q0, q13, #19
vshl.u32 q1, q13, #32-19
veor.u32 q4, q4, q0
vshr.u32 q15, q6, #7
vshl.u32 q0, q6, #32-7
veor.u32 q4, q4, q1
veor.u32 q15, q15, q0
vshr.u32 q1, q13, #10
vshr.u32 q0, q6, #18
veor.u32 q4, q4, q1
veor.u32 q15, q15, q0
vshl.u32 q1, q6, #32-18
vshr.u32 q0, q6, #3
veor.u32 q15, q15, q1
vadd.u32 q4, q4, q5
veor.u32 q15, q15, q0
vmov.u32 q5, #0x80000000
vadd.u32 q15, q15, q4
vshr.u32 q4, q14, #17
vshl.u32 q0, q14, #32-17
vadd.u32 q6, q6, q9
vst1.u32 {q14}, [r1]!
vmov.u32 q7, #0x11000000
veor.u32 q4, q4, q0
vshr.u32 q0, q14, #19
vshl.u32 q1, q14, #32-19
vadd.u32 q6, q6, q7
vmov.u32 q2, #0x00002000
veor.u32 q4, q4, q0
vst1.u32 {q15}, [r1]!
veor.u32 q4, q4, q1
vshr.u32 q1, q14, #10
vadd.u32 q6, q6, q2
veor.u32 q1, q4, q1
add r4, r4, #8*16
vshr.u32 q4, q15, #17
vshl.u32 q0, q15, #32-17
vadd.u32 q9, q6, q1
veor.u32 q4, q4, q0
vshr.u32 q0, q15, #19
vshl.u32 q1, q15, #32-19
veor.u32 q4, q4, q0
vst1.u32 {q9}, [r1]!
vadd.u32 q5, q5, q10
veor.u32 q4, q4, q1
vshr.u32 q1, q15, #10
vshl.u32 q0, q9, #32-17
veor.u32 q10, q4, q1
vshr.u32 q4, q9, #17
vadd.u32 q10, q10, q5
veor.u32 q4, q4, q0
vshr.u32 q0, q9, #19
vshl.u32 q1, q9, #32-19
veor.u32 q4, q4, q0
vshr.u32 q0, q9, #10
veor.u32 q4, q4, q1
vst1.u32 {q10}, [r1]!
veor.u32 q1, q4, q0
vshr.u32 q4, q10, #17
vshl.u32 q0, q10, #32-17
vadd.u32 q11, q11, q1
veor.u32 q4, q4, q0
vshr.u32 q0, q10, #19
vshl.u32 q1, q10, #32-19
veor.u32 q4, q4, q0
vst1.u32 {q11}, [r1]!
veor.u32 q4, q4, q1
vshr.u32 q1, q10, #10
vshl.u32 q0, q11, #32-17
veor.u32 q1, q4, q1
vshr.u32 q4, q11, #17
vadd.u32 q12, q12, q1
veor.u32 q4, q4, q0
vshr.u32 q0, q11, #19
vshl.u32 q1, q11, #32-19
veor.u32 q4, q4, q0
vshr.u32 q0, q11, #10
veor.u32 q4, q4, q1
vst1.u32 {q12}, [r1]!
veor.u32 q1, q4, q0
vshr.u32 q4, q12, #17
vshl.u32 q0, q12, #32-17
vadd.u32 q13, q13, q1
veor.u32 q4, q4, q0
vshr.u32 q0, q12, #19
vshl.u32 q1, q12, #32-19
veor.u32 q4, q4, q0
vst1.u32 {q13}, [r1]!
veor.u32 q4, q4, q1
vshr.u32 q1, q12, #10
vshl.u32 q0, q13, #32-17
veor.u32 q1, q4, q1
vshr.u32 q4, q13, #17
vadd.u32 q14, q14, q1
veor.u32 q4, q4, q0
vshr.u32 q0, q13, #19
vshl.u32 q1, q13, #32-19
veor.u32 q4, q4, q0
vshr.u32 q0, q13, #10
veor.u32 q4, q4, q1
vst1.u32 {q14}, [r1]!
veor.u32 q4, q4, q0
vmov.u32 q6, #0x00000100
vadd.u32 q15, q15, q4
vshr.u32 q4, q14, #17
vshl.u32 q0, q14, #32-17
vmov.u32 q7, #0x00400000
vst1.u32 {q15}, [r1]!
veor.u32 q4, q4, q0
vshr.u32 q0, q14, #19
vshl.u32 q1, q14, #32-19
veor.u32 q4, q4, q0
vadd.u32 q9, q9, q7
veor.u32 q4, q4, q1
vshr.u32 q1, q14, #10
vmov.u32 q2, #0x00000022
veor.u32 q4, q4, q1
vadd.u32 q9, q9, q2
vld1.u32 {q5}, [r4]!
vadd.u32 q9, q9, q4
vshr.u32 q4, q15, #17
vshl.u32 q0, q15, #32-17
vadd.u32 q6, q6, q10
vst1.u32 {q9}, [r1]!
veor.u32 q4, q4, q0
vshr.u32 q0, q15, #19
vshl.u32 q1, q15, #32-19
veor.u32 q4, q4, q0
vshr.u32 q10, q5, #7
veor.u32 q4, q4, q1
vshl.u32 q0, q5, #32-7
vshr.u32 q1, q15, #10
veor.u32 q10, q10, q0
vshr.u32 q0, q5, #18
veor.u32 q4, q4, q1
veor.u32 q10, q10, q0
vshl.u32 q1, q5, #32-18
vshr.u32 q0, q5, #3
veor.u32 q10, q10, q1
vadd.u32 q1, q6, q4
veor.u32 q10, q10, q0
vld1.u32 {q6}, [r4]!
vadd.u32 q10, q10, q1
b sha256d_ms_4way_extend_loop2
.align 4
sha256d_ms_4way_4h:
.long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
.long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85
.long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372
.long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a
.long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f
.long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c
.long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
.long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
sha256d_ms_4way_extend_coda2:
adr r4, sha256d_ms_4way_4h
mov r1, sp
vldmia r4, {q0-q7}
vmov.u32 q15, q7
sub r3, r3, #64*16
b sha256d_ms_4way_main_loop2
.macro sha256_4way_main_round_red i, rk, rw, rd, re, rf, rg, rh
vld1.u32 {q8}, [\rw]!
vand.u32 q9, \rf, \re
vbic.u32 q10, \rg, \re
vshr.u32 q11, \re, #5
vorr.u32 q10, q10, q9
vshl.u32 q12, \re, #32-5
vadd.u32 \rh, \rh, q10
veor.u32 q10, \re, q11
vshr.u32 q11, \re, #19
veor.u32 q10, q10, q12
vshl.u32 q12, \re, #32-19
veor.u32 q10, q10, q11
vadd.u32 \rh, \rh, q8
veor.u32 q10, q10, q12
vld1.u32 {q9}, [\rk]!
vadd.u32 \rh, \rh, \rd
vshr.u32 q11, q10, #6
vadd.u32 \rh, \rh, q9
vshl.u32 q13, q10, #32-6
vadd.u32 \rh, \rh, q11
vadd.u32 \rh, \rh, q13
.endm
sha256d_ms_4way_finish:
sha256_4way_main_round_red 57, r3, r1, q2, q7, q4, q5, q6
sha256_4way_main_round_red 58, r3, r1, q1, q6, q7, q4, q5
sha256_4way_main_round_red 59, r3, r1, q0, q5, q6, q7, q4
sha256_4way_main_round_red 60, r3, r1, q3, q4, q5, q6, q7
vadd.u32 q7, q7, q15
add r0, r0, #7*16
vst1.u32 {q7}, [r0]
mov sp, r12
vpop {q4-q7}
ldmfd sp!, {r4, pc}
.text
.code 32
.align 2
.globl sha256_use_4way
.globl _sha256_use_4way
#ifdef __ELF__
.type sha256_use_4way, %function
#endif
sha256_use_4way:
_sha256_use_4way:
mov r0, #1
bx lr
#endif /* __ARM_NEON__ */
#endif
07070100000029000081A4000003E800000064000000015EF4BCA10000B174000000000000000000000000000000000000001A00000000cpuminer-2.5.1/sha2-ppc.S/*
* Copyright 2014-2015 pooler@litecoinpool.org
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include "cpuminer-config.h"
#if defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
#ifdef __APPLE__
#define HI(name) ha16(name)
#define LO(name) lo16(name)
#else
#define HI(name) name@ha
#define LO(name) name@l
#define r0 0
#define r1 1
#define r2 2
#define r3 3
#define r4 4
#define r5 5
#define r6 6
#define r7 7
#define r8 8
#define r9 9
#define r10 10
#define r11 11
#define r12 12
#define r13 13
#define r14 14
#define r15 15
#define r16 16
#define r17 17
#define r18 18
#define r19 19
#define r20 20
#define r21 21
#define r22 22
#define r23 23
#define r24 24
#define r25 25
#define r26 26
#define r27 27
#define r28 28
#define r29 29
#define r30 30
#define r31 31
#ifdef __ALTIVEC__
#define v0 0
#define v1 1
#define v2 2
#define v3 3
#define v4 4
#define v5 5
#define v6 6
#define v7 7
#define v8 8
#define v9 9
#define v10 10
#define v11 11
#define v12 12
#define v13 13
#define v14 14
#define v15 15
#define v16 16
#define v17 17
#define v18 18
#define v19 19
#define v20 20
#define v21 21
#define v22 22
#define v23 23
#define v24 24
#define v25 25
#define v26 26
#define v27 27
#define v28 28
#define v29 29
#define v30 30
#define v31 31
#endif
#endif
#if !(defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) || \
defined(__64BIT__) || defined(_LP64) || defined(__LP64__))
#define ld lwz
#define std stw
#define stdu stwu
#define stdux stwux
#endif
#ifdef _AIX
.csect .text[RO]
#else
.data
#endif
.align 2
sha256_h:
.long 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a
.long 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
.align 2
sha256_k:
.long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
.long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
.long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
.long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
.long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
.long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
.long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
.long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
.long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
.long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
.long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
.long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
.long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
.long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
.long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
.long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
#ifdef _AIX
.toc
T.sha256_h:
.tc sha256_h[TC], sha256_h
T.sha256_k:
.tc sha256_k[TC], sha256_k
#endif
.macro sha256_extend_doubleround i, rw, wo, ra, rb, ry, rz
lwz r14, \wo+(\i+1)*4(\rw)
rotrwi r12, \ry, 17
rotrwi r13, \ry, 19
add r11, r11, \ra
xor r12, r12, r13
srwi r13, \ry, 10
rotrwi \ra, r14, 7
xor r12, r12, r13
rotrwi r13, r14, 18
add r12, r12, r11
xor \ra, \ra, r13
srwi r13, r14, 3
lwz r11, \wo+(\i+2)*4(\rw)
xor \ra, \ra, r13
rotrwi r13, \rz, 19
add \ra, \ra, r12
rotrwi r12, \rz, 17
add r14, r14, \rb
xor r12, r12, r13
srwi r13, \rz, 10
rotrwi \rb, r11, 7
xor r12, r12, r13
rotrwi r13, r11, 18
stw \ra, \wo+(\i+16)*4(\rw)
xor \rb, \rb, r13
srwi r13, r11, 3
add r14, r14, r12
xor \rb, \rb, r13
add \rb, \rb, r14
stw \rb, \wo+(\i+17)*4(\rw)
.endm
.macro sha256_main_round i, rk, rw, wo, ra, rb, rc, rd, re, rf, rg, rh
lwz r12, \wo+(\i)*4(\rw)
and r13, \rf, \re
andc r14, \rg, \re
lwz r15, (\i)*4(\rk)
or r14, r14, r13
rotrwi r13, \re, 5
add \rh, \rh, r14
xor r14, \re, r13
rotrwi r13, \re, 19
add \rh, \rh, r12
xor r14, r14, r13
add \rh, \rh, r15
rotrwi r13, r14, 6
xor r15, \ra, \rb
add \rh, \rh, r13
rotrwi r13, \ra, 11
and r15, r15, \rc
xor r12, \ra, r13
rotrwi r13, \ra, 20
and r14, \ra, \rb
xor r12, r12, r13
xor r14, r14, r15
rotrwi r13, r12, 2
add r15, \rh, r14
add \rh, \rh, \rd
add \rd, r15, r13
.endm
.macro sha256_main_quadround i, rk, rw, wo
sha256_main_round \i+0, \rk, \rw, \wo, r4, r5, r6, r7, r8, r9, r10, r11
sha256_main_round \i+1, \rk, \rw, \wo, r7, r4, r5, r6, r11, r8, r9, r10
sha256_main_round \i+2, \rk, \rw, \wo, r6, r7, r4, r5, r10, r11, r8, r9
sha256_main_round \i+3, \rk, \rw, \wo, r5, r6, r7, r4, r9, r10, r11, r8
.endm
#ifdef _AIX
.csect .text[PR]
#else
.text
#endif
.align 2
.globl sha256_transform
.globl _sha256_transform
.globl .sha256_transform
#ifdef __ELF__
.type sha256_transform, %function
#endif
sha256_transform:
_sha256_transform:
.sha256_transform:
stdu r1, -76*4(r1)
cmpwi 0, r5, 0
std r13, 2*4(r1)
std r14, 4*4(r1)
std r15, 6*4(r1)
std r16, 72*4(r1)
bne 0, sha256_transform_swap
lwz r11, 0*4(r4)
lwz r14, 1*4(r4)
lwz r15, 2*4(r4)
lwz r7, 3*4(r4)
lwz r8, 4*4(r4)
lwz r9, 5*4(r4)
lwz r10, 6*4(r4)
lwz r0, 7*4(r4)
lwz r12, 8*4(r4)
lwz r13, 9*4(r4)
lwz r5, 10*4(r4)
lwz r6, 11*4(r4)
stw r11, 8*4+0*4(r1)
stw r14, 8*4+1*4(r1)
stw r15, 8*4+2*4(r1)
stw r7, 8*4+3*4(r1)
stw r8, 8*4+4*4(r1)
stw r9, 8*4+5*4(r1)
stw r10, 8*4+6*4(r1)
stw r0, 8*4+7*4(r1)
stw r12, 8*4+8*4(r1)
stw r13, 8*4+9*4(r1)
stw r5, 8*4+10*4(r1)
stw r6, 8*4+11*4(r1)
lwz r7, 12*4(r4)
lwz r8, 13*4(r4)
lwz r9, 14*4(r4)
lwz r10, 15*4(r4)
mr r4, r13
stw r7, 8*4+12*4(r1)
stw r8, 8*4+13*4(r1)
stw r9, 8*4+14*4(r1)
stw r10, 8*4+15*4(r1)
b sha256_transform_extend
sha256_transform_swap:
li r13, 1*4
li r14, 2*4
li r15, 3*4
lwbrx r11, 0, r4
lwbrx r7, r4, r13
lwbrx r8, r4, r14
lwbrx r9, r4, r15
addi r4, r4, 4*4
stw r11, 8*4+0*4(r1)
stw r7, 8*4+1*4(r1)
stw r8, 8*4+2*4(r1)
stw r9, 8*4+3*4(r1)
lwbrx r7, 0, r4
lwbrx r8, r4, r13
lwbrx r9, r4, r14
lwbrx r10, r4, r15
addi r4, r4, 4*4
stw r7, 8*4+4*4(r1)
stw r8, 8*4+5*4(r1)
stw r9, 8*4+6*4(r1)
stw r10, 8*4+7*4(r1)
lwbrx r8, 0, r4
lwbrx r12, r4, r13
lwbrx r5, r4, r14
lwbrx r6, r4, r15
addi r4, r4, 4*4
stw r8, 8*4+8*4(r1)
stw r12, 8*4+9*4(r1)
stw r5, 8*4+10*4(r1)
stw r6, 8*4+11*4(r1)
lwbrx r7, 0, r4
lwbrx r8, r4, r13
lwbrx r9, r4, r14
lwbrx r10, r4, r15
mr r4, r12
stw r7, 8*4+12*4(r1)
stw r8, 8*4+13*4(r1)
stw r9, 8*4+14*4(r1)
stw r10, 8*4+15*4(r1)
sha256_transform_extend:
sha256_extend_doubleround 0, r1, 8*4, r4, r5, r9, r10
sha256_extend_doubleround 2, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 4, r1, 8*4, r8, r9, r6, r7
sha256_extend_doubleround 6, r1, 8*4, r10, r4, r8, r9
sha256_extend_doubleround 8, r1, 8*4, r5, r6, r10, r4
sha256_extend_doubleround 10, r1, 8*4, r7, r8, r5, r6
sha256_extend_doubleround 12, r1, 8*4, r9, r10, r7, r8
sha256_extend_doubleround 14, r1, 8*4, r4, r5, r9, r10
sha256_extend_doubleround 16, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 18, r1, 8*4, r8, r9, r6, r7
sha256_extend_doubleround 20, r1, 8*4, r10, r4, r8, r9
sha256_extend_doubleround 22, r1, 8*4, r5, r6, r10, r4
sha256_extend_doubleround 24, r1, 8*4, r7, r8, r5, r6
sha256_extend_doubleround 26, r1, 8*4, r9, r10, r7, r8
sha256_extend_doubleround 28, r1, 8*4, r4, r5, r9, r10
sha256_extend_doubleround 30, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 32, r1, 8*4, r8, r9, r6, r7
sha256_extend_doubleround 34, r1, 8*4, r10, r4, r8, r9
sha256_extend_doubleround 36, r1, 8*4, r5, r6, r10, r4
sha256_extend_doubleround 38, r1, 8*4, r7, r8, r5, r6
sha256_extend_doubleround 40, r1, 8*4, r9, r10, r7, r8
sha256_extend_doubleround 42, r1, 8*4, r4, r5, r9, r10
sha256_extend_doubleround 44, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 46, r1, 8*4, r8, r9, r6, r7
lwz r4, 0*4(r3)
lwz r5, 1*4(r3)
lwz r6, 2*4(r3)
lwz r7, 3*4(r3)
lwz r8, 4*4(r3)
lwz r9, 5*4(r3)
lwz r10, 6*4(r3)
lwz r11, 7*4(r3)
#ifdef _AIX
ld r16, T.sha256_k(r2)
#else
lis r16, HI(sha256_k)
addi r16, r16, LO(sha256_k)
#endif
sha256_main_quadround 0, r16, r1, 8*4
sha256_main_quadround 4, r16, r1, 8*4
sha256_main_quadround 8, r16, r1, 8*4
sha256_main_quadround 12, r16, r1, 8*4
sha256_main_quadround 16, r16, r1, 8*4
sha256_main_quadround 20, r16, r1, 8*4
sha256_main_quadround 24, r16, r1, 8*4
sha256_main_quadround 28, r16, r1, 8*4
sha256_main_quadround 32, r16, r1, 8*4
sha256_main_quadround 36, r16, r1, 8*4
sha256_main_quadround 40, r16, r1, 8*4
sha256_main_quadround 44, r16, r1, 8*4
sha256_main_quadround 48, r16, r1, 8*4
sha256_main_quadround 52, r16, r1, 8*4
sha256_main_quadround 56, r16, r1, 8*4
sha256_main_quadround 60, r16, r1, 8*4
lwz r12, 0*4(r3)
lwz r13, 1*4(r3)
lwz r14, 2*4(r3)
lwz r15, 3*4(r3)
add r4, r4, r12
add r5, r5, r13
add r6, r6, r14
add r7, r7, r15
stw r4, 0*4(r3)
stw r5, 1*4(r3)
stw r6, 2*4(r3)
stw r7, 3*4(r3)
lwz r12, 4*4(r3)
lwz r13, 5*4(r3)
lwz r14, 6*4(r3)
lwz r15, 7*4(r3)
add r8, r8, r12
add r9, r9, r13
add r10, r10, r14
add r11, r11, r15
stw r8, 4*4(r3)
stw r9, 5*4(r3)
stw r10, 6*4(r3)
stw r11, 7*4(r3)
ld r13, 2*4(r1)
ld r14, 4*4(r1)
ld r15, 6*4(r1)
ld r16, 72*4(r1)
addi r1, r1, 76*4
blr
.align 2
.globl sha256d_ms
.globl _sha256d_ms
.globl .sha256d_ms
#ifdef __ELF__
.type sha256d_ms, %function
#endif
sha256d_ms:
_sha256d_ms:
.sha256d_ms:
stdu r1, -80*4(r1)
std r13, 2*4(r1)
std r14, 4*4(r1)
std r15, 6*4(r1)
std r16, 72*4(r1)
std r17, 74*4(r1)
std r18, 76*4(r1)
mr r17, r4
mr r18, r5
mr r16, r6
lwz r14, 3*4(r17)
lwz r6, 18*4(r17)
lwz r7, 19*4(r17)
rotrwi r12, r14, 7
rotrwi r13, r14, 18
stw r6, 8*4+18*4(r1)
xor r12, r12, r13
srwi r13, r14, 3
stw r7, 8*4+19*4(r1)
xor r12, r12, r13
lwz r8, 20*4(r17)
add r6, r6, r12
lwz r10, 22*4(r17)
add r7, r7, r14
stw r6, 18*4(r17)
rotrwi r12, r6, 17
rotrwi r13, r6, 19
stw r7, 19*4(r17)
xor r12, r12, r13
srwi r13, r6, 10
stw r8, 8*4+20*4(r1)
xor r12, r12, r13
lwz r4, 23*4(r17)
add r8, r8, r12
lwz r5, 24*4(r17)
rotrwi r9, r7, 17
rotrwi r13, r7, 19
stw r8, 20*4(r17)
xor r9, r9, r13
srwi r13, r7, 10
stw r10, 8*4+21*4(r1)
xor r9, r9, r13
stw r4, 8*4+22*4(r1)
rotrwi r12, r8, 17
rotrwi r13, r8, 19
stw r9, 21*4(r17)
xor r12, r12, r13
srwi r13, r8, 10
stw r5, 8*4+23*4(r1)
xor r12, r12, r13
rotrwi r14, r9, 17
rotrwi r13, r9, 19
add r10, r10, r12
lwz r11, 30*4(r17)
xor r14, r14, r13
srwi r13, r9, 10
stw r10, 22*4(r17)
xor r14, r14, r13
stw r11, 8*4+24*4(r1)
add r4, r4, r14
rotrwi r12, r10, 17
rotrwi r13, r10, 19
stw r4, 23*4(r17)
xor r12, r12, r13
srwi r13, r10, 10
rotrwi r14, r4, 17
xor r12, r12, r13
rotrwi r13, r4, 19
xor r14, r14, r13
srwi r13, r4, 10
add r5, r5, r12
xor r14, r14, r13
stw r5, 24*4(r17)
add r6, r6, r14
rotrwi r12, r5, 17
rotrwi r13, r5, 19
stw r6, 25*4(r17)
xor r12, r12, r13
srwi r13, r5, 10
rotrwi r14, r6, 17
xor r12, r12, r13
rotrwi r13, r6, 19
xor r14, r14, r13
srwi r13, r6, 10
add r7, r7, r12
xor r14, r14, r13
stw r7, 26*4(r17)
add r8, r8, r14
rotrwi r12, r7, 17
rotrwi r13, r7, 19
stw r8, 27*4(r17)
xor r12, r12, r13
srwi r13, r7, 10
rotrwi r14, r8, 17
xor r12, r12, r13
rotrwi r13, r8, 19
xor r14, r14, r13
srwi r13, r8, 10
add r9, r9, r12
xor r14, r14, r13
stw r9, 28*4(r17)
add r10, r10, r14
lwz r14, 31*4(r17)
rotrwi r12, r9, 17
rotrwi r13, r9, 19
stw r10, 29*4(r17)
xor r12, r12, r13
srwi r13, r9, 10
stw r14, 8*4+25*4(r1)
xor r12, r12, r13
add r11, r11, r12
add r5, r5, r14
rotrwi r12, r10, 17
rotrwi r13, r10, 19
add r4, r4, r11
lwz r11, 16*4(r17)
xor r12, r12, r13
srwi r13, r10, 10
stw r4, 30*4(r17)
xor r12, r12, r13
add r5, r5, r12
stw r5, 31*4(r17)
sha256_extend_doubleround 16, r17, 0, r6, r7, r4, r5
sha256_extend_doubleround 18, r17, 0, r8, r9, r6, r7
sha256_extend_doubleround 20, r17, 0, r10, r4, r8, r9
sha256_extend_doubleround 22, r17, 0, r5, r6, r10, r4
sha256_extend_doubleround 24, r17, 0, r7, r8, r5, r6
sha256_extend_doubleround 26, r17, 0, r9, r10, r7, r8
sha256_extend_doubleround 28, r17, 0, r4, r5, r9, r10
sha256_extend_doubleround 30, r17, 0, r6, r7, r4, r5
sha256_extend_doubleround 32, r17, 0, r8, r9, r6, r7
sha256_extend_doubleround 34, r17, 0, r10, r4, r8, r9
sha256_extend_doubleround 36, r17, 0, r5, r6, r10, r4
sha256_extend_doubleround 38, r17, 0, r7, r8, r5, r6
sha256_extend_doubleround 40, r17, 0, r9, r10, r7, r8
sha256_extend_doubleround 42, r17, 0, r4, r5, r9, r10
sha256_extend_doubleround 44, r17, 0, r6, r7, r4, r5
sha256_extend_doubleround 46, r17, 0, r8, r9, r6, r7
lwz r4, 0*4(r16)
lwz r9, 1*4(r16)
lwz r10, 2*4(r16)
lwz r11, 3*4(r16)
lwz r8, 4*4(r16)
lwz r5, 5*4(r16)
lwz r6, 6*4(r16)
lwz r7, 7*4(r16)
#ifdef _AIX
ld r16, T.sha256_k(r2)
#else
lis r16, HI(sha256_k)
addi r16, r16, LO(sha256_k)
#endif
sha256_main_round 3, r16, r17, 0, r5, r6, r7, r4, r9, r10, r11, r8
sha256_main_quadround 4, r16, r17, 0
sha256_main_quadround 8, r16, r17, 0
sha256_main_quadround 12, r16, r17, 0
sha256_main_quadround 16, r16, r17, 0
sha256_main_quadround 20, r16, r17, 0
sha256_main_quadround 24, r16, r17, 0
sha256_main_quadround 28, r16, r17, 0
sha256_main_quadround 32, r16, r17, 0
sha256_main_quadround 36, r16, r17, 0
sha256_main_quadround 40, r16, r17, 0
sha256_main_quadround 44, r16, r17, 0
sha256_main_quadround 48, r16, r17, 0
sha256_main_quadround 52, r16, r17, 0
sha256_main_quadround 56, r16, r17, 0
sha256_main_quadround 60, r16, r17, 0
lwz r12, 0*4(r18)
lwz r13, 1*4(r18)
lwz r14, 2*4(r18)
lwz r15, 3*4(r18)
add r4, r4, r12
add r5, r5, r13
add r6, r6, r14
add r7, r7, r15
stw r4, 8*4+0*4(r1)
stw r5, 8*4+1*4(r1)
stw r6, 8*4+2*4(r1)
stw r7, 8*4+3*4(r1)
lwz r12, 4*4(r18)
lwz r13, 5*4(r18)
lwz r14, 6*4(r18)
lwz r15, 7*4(r18)
add r8, r8, r12
add r9, r9, r13
add r10, r10, r14
add r11, r11, r15
stw r8, 8*4+4*4(r1)
stw r9, 8*4+5*4(r1)
stw r10, 8*4+6*4(r1)
stw r11, 8*4+7*4(r1)
lwz r4, 8*4+18*4(r1)
lwz r5, 8*4+19*4(r1)
lwz r6, 8*4+20*4(r1)
lwz r7, 8*4+21*4(r1)
lwz r8, 8*4+22*4(r1)
lwz r9, 8*4+23*4(r1)
lwz r10, 8*4+24*4(r1)
lwz r11, 8*4+25*4(r1)
stw r4, 18*4(r17)
stw r5, 19*4(r17)
stw r6, 20*4(r17)
stw r7, 22*4(r17)
stw r8, 23*4(r17)
stw r9, 24*4(r17)
stw r10, 30*4(r17)
stw r11, 31*4(r17)
lis r8, 0x8000
li r9, 0
li r10, 0x0100
lwz r14, 8*4+1*4(r1)
lwz r4, 8*4+0*4(r1)
lwz r11, 8*4+2*4(r1)
rotrwi r12, r14, 7
rotrwi r13, r14, 18
stw r8, 8*4+8*4(r1)
stw r9, 8*4+9*4(r1)
stw r9, 8*4+10*4(r1)
stw r9, 8*4+11*4(r1)
stw r9, 8*4+12*4(r1)
stw r9, 8*4+13*4(r1)
stw r9, 8*4+14*4(r1)
stw r10, 8*4+15*4(r1)
xor r12, r12, r13
srwi r13, r14, 3
addis r5, r14, 0x00a0
xor r12, r12, r13
rotrwi r14, r11, 7
rotrwi r13, r11, 18
add r4, r4, r12
xor r14, r14, r13
srwi r13, r11, 3
stw r4, 8*4+16*4(r1)
xor r14, r14, r13
rotrwi r12, r4, 17
rotrwi r13, r4, 19
add r5, r5, r14
lwz r14, 8*4+3*4(r1)
stw r5, 8*4+17*4(r1)
xor r12, r12, r13
srwi r13, r4, 10
rotrwi r6, r14, 7
xor r12, r12, r13
rotrwi r13, r14, 18
xor r6, r6, r13
srwi r13, r14, 3
add r11, r11, r12
xor r6, r6, r13
rotrwi r12, r5, 17
rotrwi r13, r5, 19
add r6, r6, r11
lwz r11, 8*4+4*4(r1)
stw r6, 8*4+18*4(r1)
xor r12, r12, r13
srwi r13, r5, 10
rotrwi r7, r11, 7
xor r12, r12, r13
rotrwi r13, r11, 18
xor r7, r7, r13
srwi r13, r11, 3
add r14, r14, r12
xor r7, r7, r13
rotrwi r12, r6, 17
rotrwi r13, r6, 19
add r7, r7, r14
lwz r14, 8*4+5*4(r1)
stw r7, 8*4+19*4(r1)
xor r12, r12, r13
srwi r13, r6, 10
rotrwi r8, r14, 7
xor r12, r12, r13
rotrwi r13, r14, 18
xor r8, r8, r13
srwi r13, r14, 3
add r11, r11, r12
xor r8, r8, r13
rotrwi r12, r7, 17
rotrwi r13, r7, 19
add r8, r8, r11
lwz r11, 8*4+6*4(r1)
stw r8, 8*4+20*4(r1)
xor r12, r12, r13
srwi r13, r7, 10
rotrwi r9, r11, 7
xor r12, r12, r13
rotrwi r13, r11, 18
xor r9, r9, r13
srwi r13, r11, 3
add r14, r14, r12
xor r9, r9, r13
rotrwi r12, r8, 17
rotrwi r13, r8, 19
add r9, r9, r14
lwz r14, 8*4+7*4(r1)
stw r9, 8*4+21*4(r1)
xor r12, r12, r13
srwi r13, r8, 10
rotrwi r10, r14, 7
xor r12, r12, r13
rotrwi r13, r14, 18
xor r10, r10, r13
srwi r13, r14, 3
add r11, r11, r12
xor r10, r10, r13
rotrwi r12, r9, 17
rotrwi r13, r9, 19
addi r11, r11, 0x0100
add r14, r14, r4
add r10, r10, r11
xor r12, r12, r13
srwi r13, r9, 10
stw r10, 8*4+22*4(r1)
addis r14, r14, 0x1100
xor r12, r12, r13
add r14, r14, r12
rotrwi r12, r10, 17
rotrwi r13, r10, 19
addi r4, r14, 0x2000
xor r12, r12, r13
srwi r13, r10, 10
stw r4, 8*4+23*4(r1)
addis r5, r5, 0x8000
xor r12, r12, r13
add r5, r5, r12
rotrwi r12, r4, 17
rotrwi r13, r4, 19
stw r5, 8*4+24*4(r1)
xor r12, r12, r13
srwi r13, r4, 10
rotrwi r11, r5, 17
xor r12, r12, r13
rotrwi r13, r5, 19
xor r11, r11, r13
srwi r13, r5, 10
add r6, r6, r12
xor r11, r11, r13
stw r6, 8*4+25*4(r1)
add r7, r7, r11
rotrwi r12, r6, 17
rotrwi r13, r6, 19
stw r7, 8*4+26*4(r1)
xor r12, r12, r13
srwi r13, r6, 10
rotrwi r11, r7, 17
xor r12, r12, r13
rotrwi r13, r7, 19
xor r11, r11, r13
srwi r13, r7, 10
add r8, r8, r12
xor r11, r11, r13
stw r8, 8*4+27*4(r1)
add r9, r9, r11
rotrwi r14, r8, 17
rotrwi r13, r8, 19
rotrwi r12, r9, 17
stw r9, 8*4+28*4(r1)
addis r4, r4, 0x0040
xor r14, r14, r13
rotrwi r13, r9, 19
xor r12, r12, r13
srwi r13, r8, 10
xor r14, r14, r13
srwi r13, r9, 10
xor r12, r12, r13
addi r4, r4, 0x0022
add r10, r10, r14
add r4, r4, r12
lwz r11, 8*4+16*4(r1)
addi r5, r5, 0x0100
stw r4, 8*4+30*4(r1)
rotrwi r14, r11, 7
stw r10, 8*4+29*4(r1)
rotrwi r13, r11, 18
rotrwi r12, r10, 17
xor r14, r14, r13
rotrwi r13, r10, 19
xor r12, r12, r13
srwi r13, r11, 3
xor r14, r14, r13
srwi r13, r10, 10
xor r12, r12, r13
add r5, r5, r14
add r5, r5, r12
stw r5, 8*4+31*4(r1)
sha256_extend_doubleround 16, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 18, r1, 8*4, r8, r9, r6, r7
sha256_extend_doubleround 20, r1, 8*4, r10, r4, r8, r9
sha256_extend_doubleround 22, r1, 8*4, r5, r6, r10, r4
sha256_extend_doubleround 24, r1, 8*4, r7, r8, r5, r6
sha256_extend_doubleround 26, r1, 8*4, r9, r10, r7, r8
sha256_extend_doubleround 28, r1, 8*4, r4, r5, r9, r10
sha256_extend_doubleround 30, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 32, r1, 8*4, r8, r9, r6, r7
sha256_extend_doubleround 34, r1, 8*4, r10, r4, r8, r9
sha256_extend_doubleround 36, r1, 8*4, r5, r6, r10, r4
sha256_extend_doubleround 38, r1, 8*4, r7, r8, r5, r6
sha256_extend_doubleround 40, r1, 8*4, r9, r10, r7, r8
sha256_extend_doubleround 42, r1, 8*4, r4, r5, r9, r10
#ifdef _AIX
ld r18, T.sha256_h(r2)
#else
lis r18, HI(sha256_h)
addi r18, r18, LO(sha256_h)
#endif
lwz r14, 8*4+(44+1)*4(r1)
rotrwi r12, r4, 17
rotrwi r13, r4, 19
add r15, r11, r6
rotrwi r6, r14, 7
rotrwi r11, r14, 18
xor r12, r12, r13
xor r6, r6, r11
lwz r8, 4*4(r18)
lwz r9, 5*4(r18)
lwz r10, 6*4(r18)
lwz r11, 7*4(r18)
srwi r13, r4, 10
srwi r14, r14, 3
xor r12, r12, r13
xor r6, r6, r14
add r12, r12, r15
add r6, r6, r12
stw r6, 8*4+(44+16)*4(r1)
lwz r4, 0*4(r18)
lwz r5, 1*4(r18)
lwz r6, 2*4(r18)
lwz r7, 3*4(r18)
sha256_main_quadround 0, r16, r1, 8*4
sha256_main_quadround 4, r16, r1, 8*4
sha256_main_quadround 8, r16, r1, 8*4
sha256_main_quadround 12, r16, r1, 8*4
sha256_main_quadround 16, r16, r1, 8*4
sha256_main_quadround 20, r16, r1, 8*4
sha256_main_quadround 24, r16, r1, 8*4
sha256_main_quadround 28, r16, r1, 8*4
sha256_main_quadround 32, r16, r1, 8*4
sha256_main_quadround 36, r16, r1, 8*4
sha256_main_quadround 40, r16, r1, 8*4
sha256_main_quadround 44, r16, r1, 8*4
sha256_main_quadround 48, r16, r1, 8*4
sha256_main_quadround 52, r16, r1, 8*4
sha256_main_round 56, r16, r1, 8*4, r4, r5, r6, r7, r8, r9, r10, r11
.macro sha256_main_round_red i, rk, rw, wo, rd, re, rf, rg, rh
lwz r12, \wo+(\i)*4(\rw)
and r15, \rf, \re
andc r14, \rg, \re
add \rh, \rh, \rd
or r14, r14, r15
lwz r15, (\i)*4(\rk)
rotrwi r13, \re, 5
add \rh, \rh, r14
xor r14, \re, r13
rotrwi r13, \re, 19
add \rh, \rh, r12
xor r14, r14, r13
add \rh, \rh, r15
rotrwi r13, r14, 6
add \rh, \rh, r13
.endm
sha256_main_round_red 57, r16, r1, 8*4, r6, r11, r8, r9, r10
sha256_main_round_red 58, r16, r1, 8*4, r5, r10, r11, r8, r9
sha256_main_round_red 59, r16, r1, 8*4, r4, r9, r10, r11, r8
lwz r5, 7*4(r18)
sha256_main_round_red 60, r16, r1, 8*4, r7, r8, r9, r10, r11
add r11, r11, r5
stw r11, 7*4(r3)
ld r13, 2*4(r1)
ld r14, 4*4(r1)
ld r15, 6*4(r1)
ld r16, 72*4(r1)
ld r17, 74*4(r1)
ld r18, 76*4(r1)
addi r1, r1, 80*4
blr
#ifdef __ALTIVEC__
#ifdef __APPLE__
.machine ppc7400
#endif
#ifdef _AIX
.csect .text[RO]
#else
.data
#endif
.align 4
sha256_4h:
.long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
.long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85
.long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372
.long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a
.long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f
.long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c
.long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
.long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
.align 4
sha256_4k:
.long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98
.long 0x71374491, 0x71374491, 0x71374491, 0x71374491
.long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf
.long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5
.long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b
.long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1
.long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4
.long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5
.long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98
.long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01
.long 0x243185be, 0x243185be, 0x243185be, 0x243185be
.long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3
.long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74
.long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe
.long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7
.long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174
.long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1
.long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786
.long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6
.long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc
.long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f
.long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa
.long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc
.long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da
.long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152
.long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d
.long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8
.long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7
.long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3
.long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147
.long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351
.long 0x14292967, 0x14292967, 0x14292967, 0x14292967
.long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85
.long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138
.long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc
.long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13
.long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354
.long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb
.long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e
.long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85
.long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1
.long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b
.long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70
.long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3
.long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819
.long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624
.long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585
.long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070
.long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116
.long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08
.long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c
.long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5
.long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3
.long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a
.long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f
.long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3
.long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee
.long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f
.long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814
.long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208
.long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa
.long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb
.long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7
.long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2
.align 4
sha256d_4preext2:
.long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000
.long 0x11002000, 0x11002000, 0x11002000, 0x11002000
.long 0x80000000, 0x80000000, 0x80000000, 0x80000000
.long 0x00400022, 0x00400022, 0x00400022, 0x00400022
.align 4
br_perm:
.long 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c
#ifdef _AIX
.toc
T.sha256_4h:
.tc sha256_4h[TC], sha256_4h
T.sha256_4k:
.tc sha256_4k[TC], sha256_4k
T.sha256d_4preext2:
.tc sha256d_4preext2[TC], sha256d_4preext2
T.br_perm:
.tc br_perm[TC], br_perm
#endif
.macro sha256_4way_extend_setup
vspltisw v0, 10
vspltisw v1, -7
vspltisw v16, 3
vspltisw v17, 15
vspltisw v18, 14
vspltisw v19, 13
.endm
.macro sha256_4way_extend_doubleround i, rw, va, vb, vy, vz
lvx v14, \rw, r7
vrlw v12, \vy, v17
vrlw v13, \vy, v19
vadduwm v11, v11, \va
vxor v12, v12, v13
vsrw v13, \vy, v0
vrlw \va, v14, v1
vxor v12, v12, v13
vrlw v13, v14, v18
vadduwm v12, v12, v11
vxor \va, \va, v13
vsrw v13, v14, v16
lvx v11, \rw, r8
vxor \va, \va, v13
vrlw v13, \vz, v19
vadduwm \va, \va, v12
vrlw v12, \vz, v17
vadduwm v14, v14, \vb
vxor v12, v12, v13
vsrw v13, \vz, v0
vrlw \vb, v11, v1
vxor v12, v12, v13
vrlw v13, v11, v18
stvx \va, \rw, r10
vxor \vb, \vb, v13
vsrw v13, v11, v16
vadduwm v14, v14, v12
vxor \vb, \vb, v13
vadduwm \vb, \vb, v14
stvx \vb, \rw, r11
addi \rw, \rw, 2*16
.endm
.macro sha256_4way_main_setup
vspltisw v2, 12
vspltisw v3, -5
vspltisw v16, -6
vspltisw v17, -11
vspltisw v18, -2
.endm
.macro sha256_4way_main_round i, rk, rw, va, vb, vc, vd, ve, vf, vg, vh
li r6, (\i)*16
lvx v12, \rw, r6
vand v13, \vf, \ve
vandc v14, \vg, \ve
lvx v15, \rk, r6
vor v14, v14, v13
vrlw v13, \ve, v3
vadduwm \vh, \vh, v14
vxor v14, \ve, v13
vrlw v13, \ve, v19
vadduwm \vh, \vh, v12
vxor v14, v14, v13
vadduwm \vh, \vh, v15
vrlw v13, v14, v16
vxor v15, \va, \vb
vadduwm \vh, \vh, v13
vrlw v13, \va, v17
vand v15, v15, \vc
vxor v12, \va, v13
vrlw v13, \va, v2
vand v14, \va, \vb
vxor v12, v12, v13
vxor v14, v14, v15
vrlw v13, v12, v18
vadduwm v15, \vh, v14
vadduwm \vh, \vh, \vd
vadduwm \vd, v15, v13
.endm
.macro sha256_4way_main_quadround i, rk, rw
sha256_4way_main_round \i+0, \rk, \rw, v4, v5, v6, v7, v8, v9, v10, v11
sha256_4way_main_round \i+1, \rk, \rw, v7, v4, v5, v6, v11, v8, v9, v10
sha256_4way_main_round \i+2, \rk, \rw, v6, v7, v4, v5, v10, v11, v8, v9
sha256_4way_main_round \i+3, \rk, \rw, v5, v6, v7, v4, v9, v10, v11, v8
.endm
#ifdef _AIX
.csect .text[PR]
#else
.text
#endif
.align 2
.globl sha256_init_4way
.globl _sha256_init_4way
.globl .sha256_init_4way
#ifdef __ELF__
.type sha256_init_4way, %function
#endif
sha256_init_4way:
_sha256_init_4way:
.sha256_init_4way:
mfspr r0, 256
oris r12, r0, 0xff00
mtspr 256, r12
#ifdef _AIX
ld r4, T.sha256_4h(r2)
#else
lis r4, HI(sha256_4h)
addi r4, r4, LO(sha256_4h)
#endif
li r5, 1*16
li r6, 2*16
li r7, 3*16
li r8, 4*16
li r9, 5*16
li r10, 6*16
li r11, 7*16
lvx v0, 0, r4
lvx v1, r4, r5
lvx v2, r4, r6
lvx v3, r4, r7
lvx v4, r4, r8
lvx v5, r4, r9
lvx v6, r4, r10
lvx v7, r4, r11
stvx v0, 0, r3
stvx v1, r3, r5
stvx v2, r3, r6
stvx v3, r3, r7
stvx v4, r3, r8
stvx v5, r3, r9
stvx v6, r3, r10
stvx v7, r3, r11
mtspr 256, r0
blr
.align 2
.globl sha256_transform_4way
.globl _sha256_transform_4way
.globl .sha256_transform_4way
#ifdef __ELF__
.type sha256_transform_4way, %function
#endif
sha256_transform_4way:
_sha256_transform_4way:
.sha256_transform_4way:
mfspr r0, 256
oris r12, r0, 0xffff
ori r12, r12, 0xf000
mtspr 256, r12
andi. r6, r1, 15
cmpwi 0, r5, 0
li r7, -(4*4+64*16)
subf r6, r6, r7
stdux r1, r1, r6
li r7, 1*16
li r8, 2*16
li r9, 3*16
li r10, 4*16
li r11, 5*16
li r12, 6*16
li r6, 7*16
bne 0, sha256_transform_4way_swap
lvx v11, 0, r4
lvx v1, r4, r7
lvx v2, r4, r8
lvx v3, r4, r9
lvx v4, r4, r10
lvx v5, r4, r11
lvx v6, r4, r12
lvx v7, r4, r6
addi r5, r1, 4*4
stvx v11, 0, r5
stvx v1, r5, r7
stvx v2, r5, r8
stvx v3, r5, r9
stvx v4, r5, r10
stvx v5, r5, r11
stvx v6, r5, r12
stvx v7, r5, r6
addi r4, r4, 8*16
lvx v0, 0, r4
lvx v4, r4, r7
lvx v5, r4, r8
lvx v6, r4, r9
lvx v7, r4, r10
lvx v8, r4, r11
lvx v9, r4, r12
lvx v10, r4, r6
addi r4, r1, 4*4+8*16
stvx v0, 0, r4
stvx v4, r4, r7
stvx v5, r4, r8
stvx v6, r4, r9
stvx v7, r4, r10
stvx v8, r4, r11
stvx v9, r4, r12
stvx v10, r4, r6
b sha256_transform_4way_extend
sha256_transform_4way_swap:
#ifdef _AIX
ld r5, T.br_perm(r2)
#else
lis r5, HI(br_perm)
addi r5, r5, LO(br_perm)
#endif
lvx v19, 0, r5
lvx v11, 0, r4
lvx v1, r4, r7
lvx v2, r4, r8
lvx v3, r4, r9
lvx v4, r4, r10
lvx v5, r4, r11
lvx v6, r4, r12
lvx v7, r4, r6
vperm v11, v11, v11, v19
vperm v1, v1, v1, v19
vperm v2, v2, v2, v19
vperm v3, v3, v3, v19
vperm v4, v4, v4, v19
vperm v5, v5, v5, v19
vperm v6, v6, v6, v19
vperm v7, v7, v7, v19
addi r5, r1, 4*4
stvx v11, 0, r5
stvx v1, r5, r7
stvx v2, r5, r8
stvx v3, r5, r9
stvx v4, r5, r10
stvx v5, r5, r11
stvx v6, r5, r12
stvx v7, r5, r6
addi r4, r4, 8*16
lvx v0, 0, r4
lvx v4, r4, r7
lvx v5, r4, r8
lvx v6, r4, r9
lvx v7, r4, r10
lvx v8, r4, r11
lvx v9, r4, r12
lvx v10, r4, r6
vperm v0, v0, v0, v19
vperm v4, v4, v4, v19
vperm v5, v5, v5, v19
vperm v6, v6, v6, v19
vperm v7, v7, v7, v19
vperm v8, v8, v8, v19
vperm v9, v9, v9, v19
vperm v10, v10, v10, v19
addi r4, r1, 4*4+8*16
stvx v0, 0, r4
stvx v4, r4, r7
stvx v5, r4, r8
stvx v6, r4, r9
stvx v7, r4, r10
stvx v8, r4, r11
stvx v9, r4, r12
stvx v10, r4, r6
sha256_transform_4way_extend:
li r10, 16*16
li r11, 17*16
sha256_4way_extend_setup
sha256_4way_extend_doubleround 0, r5, v4, v5, v9, v10
sha256_4way_extend_doubleround 2, r5, v6, v7, v4, v5
sha256_4way_extend_doubleround 4, r5, v8, v9, v6, v7
sha256_4way_extend_doubleround 6, r5, v10, v4, v8, v9
sha256_4way_extend_doubleround 8, r5, v5, v6, v10, v4
sha256_4way_extend_doubleround 10, r5, v7, v8, v5, v6
sha256_4way_extend_doubleround 12, r5, v9, v10, v7, v8
sha256_4way_extend_doubleround 14, r5, v4, v5, v9, v10
sha256_4way_extend_doubleround 16, r5, v6, v7, v4, v5
sha256_4way_extend_doubleround 18, r5, v8, v9, v6, v7
sha256_4way_extend_doubleround 20, r5, v10, v4, v8, v9
sha256_4way_extend_doubleround 22, r5, v5, v6, v10, v4
sha256_4way_extend_doubleround 24, r5, v7, v8, v5, v6
sha256_4way_extend_doubleround 26, r5, v9, v10, v7, v8
sha256_4way_extend_doubleround 28, r5, v4, v5, v9, v10
sha256_4way_extend_doubleround 30, r5, v6, v7, v4, v5
sha256_4way_extend_doubleround 32, r5, v8, v9, v6, v7
sha256_4way_extend_doubleround 34, r5, v10, v4, v8, v9
sha256_4way_extend_doubleround 36, r5, v5, v6, v10, v4
sha256_4way_extend_doubleround 38, r5, v7, v8, v5, v6
sha256_4way_extend_doubleround 40, r5, v9, v10, v7, v8
sha256_4way_extend_doubleround 42, r5, v4, v5, v9, v10
sha256_4way_extend_doubleround 44, r5, v6, v7, v4, v5
sha256_4way_extend_doubleround 46, r5, v8, v9, v6, v7
addi r11, r3, 4*16
lvx v4, 0, r3
lvx v5, r3, r7
lvx v6, r3, r8
lvx v7, r3, r9
lvx v8, 0, r11
lvx v9, r11, r7
lvx v10, r11, r8
lvx v11, r11, r9
#ifdef _AIX
ld r12, T.sha256_4k(r2)
#else
lis r12, HI(sha256_4k)
addi r12, r12, LO(sha256_4k)
#endif
addi r5, r1, 4*4
sha256_4way_main_setup
sha256_4way_main_quadround 0, r12, r5
sha256_4way_main_quadround 4, r12, r5
sha256_4way_main_quadround 8, r12, r5
sha256_4way_main_quadround 12, r12, r5
sha256_4way_main_quadround 16, r12, r5
sha256_4way_main_quadround 20, r12, r5
sha256_4way_main_quadround 24, r12, r5
sha256_4way_main_quadround 28, r12, r5
sha256_4way_main_quadround 32, r12, r5
sha256_4way_main_quadround 36, r12, r5
sha256_4way_main_quadround 40, r12, r5
sha256_4way_main_quadround 44, r12, r5
sha256_4way_main_quadround 48, r12, r5
sha256_4way_main_quadround 52, r12, r5
sha256_4way_main_quadround 56, r12, r5
sha256_4way_main_quadround 60, r12, r5
lvx v12, 0, r3
lvx v13, r3, r7
lvx v14, r3, r8
lvx v15, r3, r9
lvx v16, 0, r11
lvx v17, r11, r7
lvx v18, r11, r8
lvx v19, r11, r9
vadduwm v4, v4, v12
vadduwm v5, v5, v13
vadduwm v6, v6, v14
vadduwm v7, v7, v15
vadduwm v8, v8, v16
vadduwm v9, v9, v17
vadduwm v10, v10, v18
vadduwm v11, v11, v19
stvx v4, 0, r3
stvx v5, r3, r7
stvx v6, r3, r8
stvx v7, r3, r9
stvx v8, 0, r11
stvx v9, r11, r7
stvx v10, r11, r8
stvx v11, r11, r9
ld r1, 0(r1)
mtspr 256, r0
blr
.align 2
.globl sha256d_ms_4way
.globl _sha256d_ms_4way
.globl .sha256d_ms_4way
#ifdef __ELF__
.type sha256d_ms_4way, %function
#endif
sha256d_ms_4way:
_sha256d_ms_4way:
.sha256d_ms_4way:
mfspr r0, 256
oris r12, r0, 0xffff
ori r12, r12, 0xf000
mtspr 256, r12
andi. r12, r1, 15
li r11, -(4*4+64*16)
subf r12, r12, r11
stdux r1, r1, r12
li r7, 1*16
li r8, 2*16
li r9, 3*16
li r10, 16*16
li r11, 17*16
sha256_4way_extend_setup
addi r4, r4, 2*16
addi r12, r1, 4*4+18*16
lvx v14, r4, r7
lvx v6, r4, r10
lvx v7, r4, r11
vrlw v12, v14, v1
vrlw v13, v14, v18
stvx v6, 0, r12
vxor v12, v12, v13
vsrw v13, v14, v16
stvx v7, r12, r7
vxor v12, v12, v13
vadduwm v6, v6, v12
vadduwm v7, v7, v14
stvx v6, r4, r10
vrlw v12, v6, v17
vrlw v13, v6, v19
stvx v7, r4, r11
addi r4, r4, 18*16
lvx v8, 0, r4
vxor v12, v12, v13
vsrw v13, v6, v0
stvx v8, r12, r8
vxor v12, v12, v13
vadduwm v8, v8, v12
vrlw v9, v7, v17
vrlw v13, v7, v19
stvx v8, 0, r4
vxor v9, v9, v13
vsrw v13, v7, v0
vxor v9, v9, v13
vrlw v12, v8, v17
vrlw v13, v8, v19
stvx v9, r4, r7
vxor v12, v12, v13
vsrw v13, v8, v0
lvx v10, r4, r8
lvx v4, r4, r9
vxor v12, v12, v13
stvx v10, r12, r9
addi r12, r12, 4*16
stvx v4, 0, r12
vrlw v14, v9, v17
vrlw v13, v9, v19
vadduwm v10, v10, v12
vxor v14, v14, v13
vsrw v13, v9, v0
stvx v10, r4, r8
vxor v14, v14, v13
vadduwm v4, v4, v14
vrlw v12, v10, v17
vrlw v13, v10, v19
stvx v4, r4, r9
vxor v12, v12, v13
vsrw v13, v10, v0
vrlw v14, v4, v17
vxor v12, v12, v13
vrlw v13, v4, v19
addi r4, r4, 4*16
lvx v5, 0, r4
vxor v14, v14, v13
stvx v5, r12, r7
vsrw v13, v4, v0
vadduwm v5, v5, v12
vxor v14, v14, v13
stvx v5, 0, r4
vadduwm v6, v6, v14
vrlw v12, v5, v17
vrlw v13, v5, v19
stvx v6, r4, r7
vxor v12, v12, v13
vsrw v13, v5, v0
vrlw v14, v6, v17
vxor v12, v12, v13
vrlw v13, v6, v19
vxor v14, v14, v13
vsrw v13, v6, v0
vadduwm v7, v7, v12
vxor v14, v14, v13
stvx v7, r4, r8
vadduwm v8, v8, v14
vrlw v12, v7, v17
vrlw v13, v7, v19
stvx v8, r4, r9
vxor v12, v12, v13
vsrw v13, v7, v0
vrlw v14, v8, v17
vxor v12, v12, v13
vrlw v13, v8, v19
vxor v14, v14, v13
vsrw v13, v8, v0
vadduwm v9, v9, v12
vxor v14, v14, v13
addi r4, r4, 4*16
stvx v9, 0, r4
vadduwm v10, v10, v14
vrlw v12, v9, v17
vrlw v13, v9, v19
stvx v10, r4, r7
vxor v12, v12, v13
vsrw v13, v9, v0
lvx v11, r4, r8
lvx v14, r4, r9
stvx v11, r12, r8
stvx v14, r12, r9
vxor v12, v12, v13
vadduwm v11, v11, v12
vadduwm v5, v5, v14
vrlw v12, v10, v17
vrlw v13, v10, v19
vadduwm v4, v4, v11
vxor v12, v12, v13
vsrw v13, v10, v0
stvx v4, r4, r8
vxor v12, v12, v13
vadduwm v5, v5, v12
stvx v5, r4, r9
addi r4, r4, -12*16
lvx v11, 0, r4
sha256_4way_extend_doubleround 16, r4, v6, v7, v4, v5
sha256_4way_extend_doubleround 18, r4, v8, v9, v6, v7
sha256_4way_extend_doubleround 20, r4, v10, v4, v8, v9
sha256_4way_extend_doubleround 22, r4, v5, v6, v10, v4
sha256_4way_extend_doubleround 24, r4, v7, v8, v5, v6
sha256_4way_extend_doubleround 26, r4, v9, v10, v7, v8
sha256_4way_extend_doubleround 28, r4, v4, v5, v9, v10
sha256_4way_extend_doubleround 30, r4, v6, v7, v4, v5
sha256_4way_extend_doubleround 32, r4, v8, v9, v6, v7
sha256_4way_extend_doubleround 34, r4, v10, v4, v8, v9
sha256_4way_extend_doubleround 36, r4, v5, v6, v10, v4
sha256_4way_extend_doubleround 38, r4, v7, v8, v5, v6
sha256_4way_extend_doubleround 40, r4, v9, v10, v7, v8
sha256_4way_extend_doubleround 42, r4, v4, v5, v9, v10
sha256_4way_extend_doubleround 44, r4, v6, v7, v4, v5
sha256_4way_extend_doubleround 46, r4, v8, v9, v6, v7
addi r4, r4, -48*16
lvx v4, 0, r6
lvx v9, r6, r7
lvx v10, r6, r8
lvx v11, r6, r9
addi r12, r6, 4*16
lvx v8, 0, r12
lvx v5, r12, r7
lvx v6, r12, r8
lvx v7, r12, r9
#ifdef _AIX
ld r12, T.sha256_4k(r2)
#else
lis r12, HI(sha256_4k)
addi r12, r12, LO(sha256_4k)
#endif
sha256_4way_main_setup
sha256_4way_main_round 3, r12, r4, v5, v6, v7, v4, v9, v10, v11, v8
sha256_4way_main_quadround 4, r12, r4
sha256_4way_main_quadround 8, r12, r4
sha256_4way_main_quadround 12, r12, r4
sha256_4way_main_quadround 16, r12, r4
sha256_4way_main_quadround 20, r12, r4
sha256_4way_main_quadround 24, r12, r4
sha256_4way_main_quadround 28, r12, r4
sha256_4way_main_quadround 32, r12, r4
sha256_4way_main_quadround 36, r12, r4
sha256_4way_main_quadround 40, r12, r4
sha256_4way_main_quadround 44, r12, r4
sha256_4way_main_quadround 48, r12, r4
sha256_4way_main_quadround 52, r12, r4
sha256_4way_main_quadround 56, r12, r4
sha256_4way_main_quadround 60, r12, r4
lvx v12, 0, r5
lvx v13, r5, r7
lvx v14, r5, r8
lvx v15, r5, r9
addi r12, r5, 4*16
lvx v16, 0, r12
lvx v17, r12, r7
lvx v18, r12, r8
lvx v19, r12, r9
vadduwm v4, v4, v12
vadduwm v5, v5, v13
vadduwm v6, v6, v14
vadduwm v7, v7, v15
vadduwm v8, v8, v16
vadduwm v9, v9, v17
vadduwm v10, v10, v18
vadduwm v11, v11, v19
addi r12, r1, 4*4
stvx v4, 0, r12
stvx v5, r12, r7
stvx v6, r12, r8
stvx v7, r12, r9
addi r12, r12, 4*16
stvx v8, 0, r12
stvx v9, r12, r7
stvx v10, r12, r8
stvx v11, r12, r9
addi r12, r1, 4*4+18*16
lvx v4, 0, r12
lvx v5, r12, r7
lvx v6, r12, r8
lvx v7, r12, r9
addi r12, r12, 4*16
lvx v8, 0, r12
lvx v9, r12, r7
lvx v10, r12, r8
lvx v11, r12, r9
addi r12, r4, 18*16
stvx v4, 0, r12
stvx v5, r12, r7
stvx v6, r12, r8
addi r12, r4, 22*16
stvx v7, 0, r12
stvx v8, r12, r7
stvx v9, r12, r8
addi r12, r4, 30*16
stvx v10, 0, r12
stvx v11, r12, r7
addi r4, r1, 4*4
sha256_4way_extend_setup
#ifdef _AIX
ld r12, T.sha256d_4preext2(r2)
#else
lis r12, HI(sha256d_4preext2)
addi r12, r12, LO(sha256d_4preext2)
#endif
lvx v2, 0, r12
vxor v9, v9, v9
vspltisw v3, 1
lvx v4, r12, r8
vsldoi v3, v3, v3, 1
addi r5, r1, 4*4+8*16
stvx v4, 0, r5
stvx v9, r5, r7
stvx v9, r5, r8
stvx v9, r5, r9
addi r5, r5, 4*16
stvx v9, 0, r5
stvx v9, r5, r7
stvx v9, r5, r8
stvx v3, r5, r9
lvx v4, 0, r4
lvx v14, r4, r7
lvx v11, r4, r8
vrlw v12, v14, v1
vrlw v13, v14, v18
vxor v12, v12, v13
vsrw v13, v14, v16
vadduwm v5, v14, v2
vxor v12, v12, v13
vrlw v14, v11, v1
vrlw v13, v11, v18
vadduwm v4, v4, v12
vxor v14, v14, v13
vsrw v13, v11, v16
stvx v4, r4, r10
vxor v14, v14, v13
vrlw v12, v4, v17
vrlw v13, v4, v19
vadduwm v5, v5, v14
stvx v5, r4, r11
addi r4, r4, 2*16
lvx v14, r4, r7
vxor v12, v12, v13
vsrw v13, v4, v0
vrlw v6, v14, v1
vxor v12, v12, v13
vrlw v13, v14, v18
vxor v6, v6, v13
vsrw v13, v14, v16
vadduwm v11, v11, v12
vxor v6, v6, v13
vrlw v12, v5, v17
vrlw v13, v5, v19
vadduwm v6, v6, v11
lvx v11, r4, r8
stvx v6, r4, r10
vxor v12, v12, v13
vsrw v13, v5, v0
vrlw v7, v11, v1
vxor v12, v12, v13
vrlw v13, v11, v18
vxor v7, v7, v13
vsrw v13, v11, v16
vadduwm v14, v14, v12
vxor v7, v7, v13
vrlw v12, v6, v17
vrlw v13, v6, v19
vadduwm v7, v7, v14
stvx v7, r4, r11
addi r4, r4, 2*16
lvx v14, r4, r7
vxor v12, v12, v13
vsrw v13, v6, v0
vrlw v8, v14, v1
vxor v12, v12, v13
vrlw v13, v14, v18
vxor v8, v8, v13
vsrw v13, v14, v16
vadduwm v11, v11, v12
vxor v8, v8, v13
vrlw v12, v7, v17
vrlw v13, v7, v19
vadduwm v8, v8, v11
lvx v11, r4, r8
stvx v8, r4, r10
vxor v12, v12, v13
vsrw v13, v7, v0
vrlw v9, v11, v1
vxor v12, v12, v13
vrlw v13, v11, v18
vxor v9, v9, v13
vsrw v13, v11, v16
vadduwm v14, v14, v12
vxor v9, v9, v13
vrlw v12, v8, v17
vrlw v13, v8, v19
vadduwm v9, v9, v14
stvx v9, r4, r11
addi r4, r4, 2*16
lvx v14, r4, r7
vxor v12, v12, v13
vsrw v13, v8, v0
vrlw v10, v14, v1
vxor v12, v12, v13
vrlw v13, v14, v18
vxor v10, v10, v13
vsrw v13, v14, v16
vadduwm v11, v11, v12
vxor v10, v10, v13
vrlw v12, v9, v17
vrlw v13, v9, v19
vadduwm v11, v11, v3
vadduwm v14, v14, v4
vadduwm v10, v10, v11
lvx v2, r12, r7
vxor v12, v12, v13
vsrw v13, v9, v0
stvx v10, r4, r10
vxor v12, v12, v13
vadduwm v14, v14, v12
vrlw v12, v10, v17
vrlw v13, v10, v19
vadduwm v4, v14, v2
lvx v2, r12, r8
vxor v12, v12, v13
vsrw v13, v10, v0
stvx v4, r4, r11
vadduwm v5, v5, v2
vxor v12, v12, v13
vadduwm v5, v5, v12
vrlw v12, v4, v17
vrlw v13, v4, v19
addi r4, r4, 2*16
stvx v5, r4, r10
vxor v12, v12, v13
vsrw v13, v4, v0
vrlw v11, v5, v17
vxor v12, v12, v13
vrlw v13, v5, v19
vxor v11, v11, v13
vsrw v13, v5, v0
vadduwm v6, v6, v12
vxor v11, v11, v13
stvx v6, r4, r11
vadduwm v7, v7, v11
vrlw v12, v6, v17
vrlw v13, v6, v19
addi r4, r4, 2*16
stvx v7, r4, r10
vxor v12, v12, v13
vsrw v13, v6, v0
vrlw v11, v7, v17
vxor v12, v12, v13
vrlw v13, v7, v19
vxor v11, v11, v13
vsrw v13, v7, v0
vadduwm v8, v8, v12
vxor v11, v11, v13
stvx v8, r4, r11
vadduwm v9, v9, v11
lvx v2, r12, r9
vrlw v14, v8, v17
vrlw v13, v8, v19
vrlw v12, v9, v17
addi r4, r4, 2*16
stvx v9, r4, r10
vxor v14, v14, v13
vrlw v13, v9, v19
vxor v12, v12, v13
vsrw v13, v8, v0
vxor v14, v14, v13
vsrw v13, v9, v0
vxor v12, v12, v13
vadduwm v4, v4, v2
vadduwm v10, v10, v14
vadduwm v4, v4, v12
stvx v10, r4, r11
addi r4, r4, 2*16
lvx v11, r4, r8
vadduwm v5, v5, v3
stvx v4, r4, r10
vrlw v14, v11, v1
vrlw v13, v11, v18
vrlw v12, v10, v17
vxor v14, v14, v13
vrlw v13, v10, v19
vxor v12, v12, v13
vsrw v13, v11, v16
vxor v14, v14, v13
vsrw v13, v10, v0
vxor v12, v12, v13
vadduwm v5, v5, v14
vadduwm v5, v5, v12
stvx v5, r4, r11
addi r4, r4, 2*16
sha256_4way_extend_doubleround 16, r4, v6, v7, v4, v5
sha256_4way_extend_doubleround 18, r4, v8, v9, v6, v7
sha256_4way_extend_doubleround 20, r4, v10, v4, v8, v9
sha256_4way_extend_doubleround 22, r4, v5, v6, v10, v4
sha256_4way_extend_doubleround 24, r4, v7, v8, v5, v6
sha256_4way_extend_doubleround 26, r4, v9, v10, v7, v8
sha256_4way_extend_doubleround 28, r4, v4, v5, v9, v10
sha256_4way_extend_doubleround 30, r4, v6, v7, v4, v5
sha256_4way_extend_doubleround 32, r4, v8, v9, v6, v7
sha256_4way_extend_doubleround 34, r4, v10, v4, v8, v9
sha256_4way_extend_doubleround 36, r4, v5, v6, v10, v4
sha256_4way_extend_doubleround 38, r4, v7, v8, v5, v6
sha256_4way_extend_doubleround 40, r4, v9, v10, v7, v8
sha256_4way_extend_doubleround 42, r4, v4, v5, v9, v10
lvx v14, r4, r7
vrlw v12, v4, v17
vrlw v13, v4, v19
vadduwm v15, v11, v6
vrlw v6, v14, v1
vrlw v11, v14, v18
vxor v12, v12, v13
vxor v6, v6, v11
vsrw v13, v4, v0
vsrw v14, v14, v16
vxor v12, v12, v13
vxor v6, v6, v14
vadduwm v12, v12, v15
vadduwm v6, v6, v12
stvx v6, r4, r10
addi r4, r4, -44*16
#ifdef _AIX
ld r5, T.sha256_4h(r2)
#else
lis r5, HI(sha256_4h)
addi r5, r5, LO(sha256_4h)
#endif
lvx v4, 0, r5
lvx v5, r5, r7
lvx v6, r5, r8
lvx v7, r5, r9
addi r12, r5, 4*16
lvx v8, 0, r12
lvx v9, r12, r7
lvx v10, r12, r8
lvx v11, r12, r9
#ifdef _AIX
ld r12, T.sha256_4k(r2)
#else
lis r12, HI(sha256_4k)
addi r12, r12, LO(sha256_4k)
#endif
sha256_4way_main_setup
sha256_4way_main_quadround 0, r12, r4
sha256_4way_main_quadround 4, r12, r4
sha256_4way_main_quadround 8, r12, r4
sha256_4way_main_quadround 12, r12, r4
sha256_4way_main_quadround 16, r12, r4
sha256_4way_main_quadround 20, r12, r4
sha256_4way_main_quadround 24, r12, r4
sha256_4way_main_quadround 28, r12, r4
sha256_4way_main_quadround 32, r12, r4
sha256_4way_main_quadround 36, r12, r4
sha256_4way_main_quadround 40, r12, r4
sha256_4way_main_quadround 44, r12, r4
sha256_4way_main_quadround 48, r12, r4
sha256_4way_main_quadround 52, r12, r4
sha256_4way_main_round 56, r12, r4, v4, v5, v6, v7, v8, v9, v10, v11
.macro sha256_4way_main_round_red i, rk, rw, vd, ve, vf, vg, vh
li r6, (\i)*16
vand v15, \vf, \ve
vandc v14, \vg, \ve
lvx v12, \rw, r6
vadduwm \vh, \vh, \vd
vor v14, v14, v15
lvx v15, \rk, r6
vrlw v13, \ve, v3
vadduwm \vh, \vh, v14
vxor v14, \ve, v13
vrlw v13, \ve, v19
vadduwm \vh, \vh, v12
vxor v14, v14, v13
vadduwm \vh, \vh, v15
vrlw v13, v14, v16
vadduwm \vh, \vh, v13
.endm
sha256_4way_main_round_red 57, r12, r4, v6, v11, v8, v9, v10
sha256_4way_main_round_red 58, r12, r4, v5, v10, v11, v8, v9
sha256_4way_main_round_red 59, r12, r4, v4, v9, v10, v11, v8
sha256_4way_main_round_red 60, r12, r4, v7, v8, v9, v10, v11
li r12, 7*16
lvx v19, r5, r12
vadduwm v11, v11, v19
stvx v11, r3, r12
ld r1, 0(r1)
mtspr 256, r0
blr
.align 2
.globl sha256_use_4way
.globl _sha256_use_4way
.globl .sha256_use_4way
#ifdef __ELF__
.type sha256_use_4way, %function
#endif
sha256_use_4way:
_sha256_use_4way:
.sha256_use_4way:
li r3, 1
blr
#endif /* __ALTIVEC__ */
#endif
0707010000002A000081A4000003E800000064000000015EF4BCA10001A5DF000000000000000000000000000000000000001A00000000cpuminer-2.5.1/sha2-x64.S/*
* Copyright 2012-2015 pooler@litecoinpool.org
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include "cpuminer-config.h"
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#if defined(USE_ASM) && defined(__x86_64__)
.data
.p2align 4
sha256_h:
.long 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a
.long 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
.data
.p2align 6
sha256_k:
.long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
.long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
.long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
.long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
.long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
.long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
.long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
.long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
.long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
.long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
.long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
.long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
.long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
.long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
.long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
.long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
bswap_xmm_mask:
.long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
.macro sha256_mixed_quadround ra, rb, rc, rd, re, rf, rg, rh, x0, x1, x2, x3
movdqa \x3, %xmm4
movl \re, %eax
movdqa \x2, %xmm6
rorl $(25-11), %eax
movl \ra, %ebx
pslldq $12, %xmm4
rorl $(22-13), %ebx
psrldq $4, %xmm6
xorl \re, %eax
movl \rf, %ecx
rorl $(11-6), %eax
pxor %xmm6, %xmm4
movdqa \x1, %xmm5
xorl \ra, %ebx
xorl \rg, %ecx
xorl \re, %eax
paddd \x0, %xmm4
movdqa \x0, %xmm7
andl \re, %ecx
rorl $(13-2), %ebx
xorl \ra, %ebx
pslldq $12, %xmm5
psrldq $4, %xmm7
rorl $6, %eax
xorl \rg, %ecx
pxor %xmm7, %xmm5
rorl $2, %ebx
addl %eax, %ecx
addl (%rsp) , %ecx
movdqa %xmm5, %xmm6
movl \ra, %eax
addl %ecx, \rh
movl \ra, %ecx
movdqa %xmm5, %xmm7
orl \rc, %eax
addl \rh, \rd
andl \rc, %ecx
pslld $(32-7), %xmm5
psrld $7, %xmm6
andl \rb, %eax
addl %ebx, \rh
orl %ecx, %eax
por %xmm6, %xmm5
addl %eax, \rh
movl \rd, %eax
movdqa %xmm7, %xmm6
movl \rh, %ebx
rorl $(25-11), %eax
xorl \rd, %eax
movdqa %xmm7, %xmm8
movl \re, %ecx
rorl $(22-13), %ebx
xorl \rh, %ebx
pslld $(32-18), %xmm7
rorl $(11-6), %eax
xorl \rf, %ecx
rorl $(13-2), %ebx
psrld $18, %xmm6
xorl \rd, %eax
andl \rd, %ecx
rorl $6, %eax
pxor %xmm7, %xmm5
xorl \rh, %ebx
xorl \rf, %ecx
psrld $3, %xmm8
addl %eax, %ecx
addl 1*4(%rsp), %ecx
rorl $2, %ebx
pxor %xmm6, %xmm5
movl \rh, %eax
addl %ecx, \rg
movl \rh, %ecx
pxor %xmm8, %xmm5
orl \rb, %eax
addl \rg, \rc
andl \rb, %ecx
pshufd $0xfa, \x3, %xmm6
andl \ra, %eax
addl %ebx, \rg
paddd %xmm5, %xmm4
orl %ecx, %eax
addl %eax, \rg
movl \rc, %eax
movdqa %xmm6, %xmm7
movl \rg, %ebx
rorl $(25-11), %eax
xorl \rc, %eax
movdqa %xmm6, %xmm8
rorl $(22-13), %ebx
movl \rd, %ecx
xorl \rg, %ebx
psrlq $17, %xmm6
psrlq $19, %xmm7
rorl $(11-6), %eax
xorl \re, %ecx
xorl \rc, %eax
psrld $10, %xmm8
pxor %xmm7, %xmm6
andl \rc, %ecx
rorl $(13-2), %ebx
xorl \rg, %ebx
pxor %xmm6, %xmm8
xorl \re, %ecx
rorl $6, %eax
addl %eax, %ecx
pshufd $0x8f, %xmm8, %xmm8
rorl $2, %ebx
addl 2*4(%rsp), %ecx
movl \rg, %eax
psrldq $8, %xmm8
addl %ecx, \rf
movl \rg, %ecx
orl \ra, %eax
paddd %xmm8, %xmm4
addl \rf, \rb
andl \ra, %ecx
andl \rh, %eax
pshufd $0x50, %xmm4, %xmm6
addl %ebx, \rf
orl %ecx, %eax
addl %eax, \rf
movdqa %xmm6, %xmm7
movl \rb, %eax
rorl $(25-11), %eax
movl \rf, %ebx
movdqa %xmm6, \x0
rorl $(22-13), %ebx
xorl \rb, %eax
movl \rc, %ecx
psrlq $17, %xmm6
rorl $(11-6), %eax
xorl \rf, %ebx
xorl \rd, %ecx
psrlq $19, %xmm7
xorl \rb, %eax
andl \rb, %ecx
rorl $(13-2), %ebx
psrld $10, \x0
xorl \rf, %ebx
rorl $6, %eax
pxor %xmm7, %xmm6
xorl \rd, %ecx
rorl $2, %ebx
addl %eax, %ecx
pxor %xmm6, \x0
addl 3*4(%rsp), %ecx
movl \rf, %eax
addl %ecx, \re
pshufd $0xf8, \x0, \x0
movl \rf, %ecx
orl \rh, %eax
addl \re, \ra
pslldq $8, \x0
andl \rh, %ecx
andl \rg, %eax
paddd %xmm4, \x0
addl %ebx, \re
orl %ecx, %eax
addl %eax, \re
.endm
.macro sha256_main_round i, ra, rb, rc, rd, re, rf, rg, rh
movl \re, %eax
rorl $(25-11), %eax
movl \ra, %ebx
xorl \re, %eax
rorl $(22-13), %ebx
movl \rf, %ecx
xorl \ra, %ebx
rorl $(11-6), %eax
xorl \rg, %ecx
xorl \re, %eax
rorl $(13-2), %ebx
andl \re, %ecx
xorl \ra, %ebx
rorl $6, %eax
xorl \rg, %ecx
addl %eax, %ecx
rorl $2, %ebx
addl \i*4(%rsp), %ecx
movl \ra, %eax
addl %ecx, \rh
movl \ra, %ecx
orl \rc, %eax
addl \rh, \rd
andl \rc, %ecx
andl \rb, %eax
addl %ebx, \rh
orl %ecx, %eax
addl %eax, \rh
.endm
.text
.p2align 6
sha256_transform_sse2:
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
#if defined(_WIN64) || defined(__CYGWIN__)
pushq %rdi
pushq %rsi
subq $5*16, %rsp
movdqa %xmm6, 1*16(%rsp)
movdqa %xmm7, 2*16(%rsp)
movdqa %xmm8, 3*16(%rsp)
movdqa %xmm9, 4*16(%rsp)
movq %rcx, %rdi
movq %rdx, %rsi
movq %r8, %rdx
#else
subq $16, %rsp
#endif
movl 0*4(%rdi), %r8d
movl 1*4(%rdi), %r9d
movl 2*4(%rdi), %r10d
movl 3*4(%rdi), %r11d
movl 4*4(%rdi), %r12d
movl 5*4(%rdi), %r13d
movl 6*4(%rdi), %r14d
movl 7*4(%rdi), %r15d
testq %rdx, %rdx
jnz sha256_transform_sse2_swap
movdqu 0*16(%rsi), %xmm0
movdqu 1*16(%rsi), %xmm1
movdqu 2*16(%rsi), %xmm2
movdqu 3*16(%rsi), %xmm3
jmp sha256_transform_sse2_core
sha256_transform_sse2_swap:
movdqu 0*16(%rsi), %xmm0
movdqu 1*16(%rsi), %xmm1
movdqu 2*16(%rsi), %xmm2
movdqu 3*16(%rsi), %xmm3
pshuflw $0xb1, %xmm0, %xmm0
pshuflw $0xb1, %xmm1, %xmm1
pshuflw $0xb1, %xmm2, %xmm2
pshuflw $0xb1, %xmm3, %xmm3
pshufhw $0xb1, %xmm0, %xmm0
pshufhw $0xb1, %xmm1, %xmm1
pshufhw $0xb1, %xmm2, %xmm2
pshufhw $0xb1, %xmm3, %xmm3
movdqa %xmm0, %xmm4
movdqa %xmm1, %xmm5
movdqa %xmm2, %xmm6
movdqa %xmm3, %xmm7
psrlw $8, %xmm4
psrlw $8, %xmm5
psrlw $8, %xmm6
psrlw $8, %xmm7
psllw $8, %xmm0
psllw $8, %xmm1
psllw $8, %xmm2
psllw $8, %xmm3
pxor %xmm4, %xmm0
pxor %xmm5, %xmm1
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
sha256_transform_sse2_core:
leaq sha256_k(%rip), %rdx
movq $48, %rsi
.p2align 4
sha256_transform_sse2_loop:
movdqa 0*16(%rdx), %xmm9
paddd %xmm0, %xmm9
movdqa %xmm9, (%rsp)
sha256_mixed_quadround %r8d, %r9d, %r10d, %r11d, %r12d, %r13d, %r14d, %r15d, %xmm0, %xmm1, %xmm2, %xmm3
movdqa 1*16(%rdx), %xmm9
paddd %xmm1, %xmm9
movdqa %xmm9, (%rsp)
sha256_mixed_quadround %r12d, %r13d, %r14d, %r15d, %r8d, %r9d, %r10d, %r11d, %xmm1, %xmm2, %xmm3, %xmm0
movdqa 2*16(%rdx), %xmm9
paddd %xmm2, %xmm9
movdqa %xmm9, (%rsp)
sha256_mixed_quadround %r8d, %r9d, %r10d, %r11d, %r12d, %r13d, %r14d, %r15d, %xmm2, %xmm3, %xmm0, %xmm1
movdqa 3*16(%rdx), %xmm9
paddd %xmm3, %xmm9
movdqa %xmm9, (%rsp)
addq $4*16, %rdx
sha256_mixed_quadround %r12d, %r13d, %r14d, %r15d, %r8d, %r9d, %r10d, %r11d, %xmm3, %xmm0, %xmm1, %xmm2
subq $16, %rsi
jne sha256_transform_sse2_loop
paddd 0*16(%rdx), %xmm0
movdqa %xmm0, (%rsp)
sha256_main_round 0, %r8d, %r9d, %r10d, %r11d, %r12d, %r13d, %r14d, %r15d
sha256_main_round 1, %r15d, %r8d, %r9d, %r10d, %r11d, %r12d, %r13d, %r14d
sha256_main_round 2, %r14d, %r15d, %r8d, %r9d, %r10d, %r11d, %r12d, %r13d
sha256_main_round 3, %r13d, %r14d, %r15d, %r8d, %r9d, %r10d, %r11d, %r12d
paddd 1*16(%rdx), %xmm1
movdqa %xmm1, (%rsp)
sha256_main_round 0, %r12d, %r13d, %r14d, %r15d, %r8d, %r9d, %r10d, %r11d
sha256_main_round 1, %r11d, %r12d, %r13d, %r14d, %r15d, %r8d, %r9d, %r10d
sha256_main_round 2, %r10d, %r11d, %r12d, %r13d, %r14d, %r15d, %r8d, %r9d
sha256_main_round 3, %r9d, %r10d, %r11d, %r12d, %r13d, %r14d, %r15d, %r8d
paddd 2*16(%rdx), %xmm2
movdqa %xmm2, (%rsp)
sha256_main_round 0, %r8d, %r9d, %r10d, %r11d, %r12d, %r13d, %r14d, %r15d
sha256_main_round 1, %r15d, %r8d, %r9d, %r10d, %r11d, %r12d, %r13d, %r14d
sha256_main_round 2, %r14d, %r15d, %r8d, %r9d, %r10d, %r11d, %r12d, %r13d
sha256_main_round 3, %r13d, %r14d, %r15d, %r8d, %r9d, %r10d, %r11d, %r12d
paddd 3*16(%rdx), %xmm3
movdqa %xmm3, (%rsp)
sha256_main_round 0, %r12d, %r13d, %r14d, %r15d, %r8d, %r9d, %r10d, %r11d
sha256_main_round 1, %r11d, %r12d, %r13d, %r14d, %r15d, %r8d, %r9d, %r10d
sha256_main_round 2, %r10d, %r11d, %r12d, %r13d, %r14d, %r15d, %r8d, %r9d
sha256_main_round 3, %r9d, %r10d, %r11d, %r12d, %r13d, %r14d, %r15d, %r8d
addl %r8d, 0*4(%rdi)
addl %r9d, 1*4(%rdi)
addl %r10d, 2*4(%rdi)
addl %r11d, 3*4(%rdi)
addl %r12d, 4*4(%rdi)
addl %r13d, 5*4(%rdi)
addl %r14d, 6*4(%rdi)
addl %r15d, 7*4(%rdi)
#if defined(_WIN64) || defined(__CYGWIN__)
movdqa 1*16(%rsp), %xmm6
movdqa 2*16(%rsp), %xmm7
movdqa 3*16(%rsp), %xmm8
movdqa 4*16(%rsp), %xmm9
addq $5*16, %rsp
popq %rsi
popq %rdi
#else
addq $16, %rsp
#endif
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbx
ret
.text
.p2align 6
sha256_transform_phe:
#if defined(_WIN64) || defined(__CYGWIN__)
pushq %rdi
pushq %rsi
movq %rcx, %rdi
movq %rdx, %rsi
movq %r8, %rdx
#endif
movq %rsp, %r8
subq $64, %rsp
andq $-64, %rsp
testq %rdx, %rdx
jnz sha256_transform_phe_noswap
movl 0*4(%rsi), %eax
movl 1*4(%rsi), %ecx
movl 2*4(%rsi), %edx
movl 3*4(%rsi), %r9d
bswapl %eax
bswapl %ecx
bswapl %edx
bswapl %r9d
movl %eax, 0*4(%rsp)
movl %ecx, 1*4(%rsp)
movl %edx, 2*4(%rsp)
movl %r9d, 3*4(%rsp)
movl 4*4(%rsi), %eax
movl 5*4(%rsi), %ecx
movl 6*4(%rsi), %edx
movl 7*4(%rsi), %r9d
bswapl %eax
bswapl %ecx
bswapl %edx
bswapl %r9d
movl %eax, 4*4(%rsp)
movl %ecx, 5*4(%rsp)
movl %edx, 6*4(%rsp)
movl %r9d, 7*4(%rsp)
movdqu 2*16(%rsi), %xmm0
movdqu 3*16(%rsi), %xmm2
pshuflw $0xb1, %xmm0, %xmm0
pshuflw $0xb1, %xmm2, %xmm2
pshufhw $0xb1, %xmm0, %xmm0
pshufhw $0xb1, %xmm2, %xmm2
movdqa %xmm0, %xmm1
movdqa %xmm2, %xmm3
psrlw $8, %xmm1
psrlw $8, %xmm3
psllw $8, %xmm0
psllw $8, %xmm2
pxor %xmm1, %xmm0
pxor %xmm3, %xmm2
movdqa %xmm0, 2*16(%rsp)
movdqa %xmm2, 3*16(%rsp)
jmp sha256_transform_phe_core
sha256_transform_phe_noswap:
movdqu 0*16(%rsi), %xmm0
movdqu 1*16(%rsi), %xmm1
movdqu 2*16(%rsi), %xmm2
movdqu 3*16(%rsi), %xmm3
movdqa %xmm0, 0*16(%rsp)
movdqa %xmm1, 1*16(%rsp)
movdqa %xmm2, 2*16(%rsp)
movdqa %xmm3, 3*16(%rsp)
sha256_transform_phe_core:
movq %rsp, %rsi
movq $-1, %rax
movq $1, %rcx
/* rep xsha256 */
.byte 0xf3, 0x0f, 0xa6, 0xd0
movq %r8, %rsp
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rsi
popq %rdi
#endif
ret
.data
.p2align 3
sha256_transform_addr:
.quad sha256_transform_sse2
.text
.p2align 3
.globl sha256_transform
.globl _sha256_transform
sha256_transform:
_sha256_transform:
jmp *sha256_transform_addr(%rip)
.text
.p2align 6
.globl sha256d_ms
.globl _sha256d_ms
sha256d_ms:
_sha256d_ms:
#if defined(_WIN64) || defined(__CYGWIN__)
pushq %rdi
pushq %rsi
movq %rcx, %rdi
movq %rdx, %rsi
movq %r8, %rdx
#endif
movq %rsp, %r8
subq $32, %rsp
andq $-32, %rsp
movdqa 0*16(%rdx), %xmm0
movdqa 1*16(%rdx), %xmm1
movdqa %xmm0, 0*16(%rdi)
movdqa %xmm1, 1*16(%rdi)
movl 0*4(%rsi), %eax
movl 1*4(%rsi), %ecx
movl 2*4(%rsi), %edx
movl 3*4(%rsi), %r9d
bswapl %eax
bswapl %ecx
bswapl %edx
bswapl %r9d
movl %eax, 0*4(%rsp)
movl %ecx, 1*4(%rsp)
movl %edx, 2*4(%rsp)
movl %r9d, 3*4(%rsp)
movq %rsp, %rsi
movl $64, %eax
movl $80, %ecx
/* rep xsha256 */
.byte 0xf3, 0x0f, 0xa6, 0xd0
movdqa bswap_xmm_mask(%rip), %xmm1
movdqa 0*16(%rdi), %xmm0
movdqa 1*16(%rdi), %xmm2
pshufb %xmm1, %xmm0
pshufb %xmm1, %xmm2
movdqa %xmm0, 0*16(%rsp)
movdqa %xmm2, 1*16(%rsp)
movdqa sha256_h+0*16(%rip), %xmm0
movdqa sha256_h+1*16(%rip), %xmm1
movdqa %xmm0, 0*16(%rdi)
movdqa %xmm1, 1*16(%rdi)
movq %rsp, %rsi
xorq %rax, %rax
movl $32, %ecx
/* rep xsha256 */
.byte 0xf3, 0x0f, 0xa6, 0xd0
movq %r8, %rsp
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rsi
popq %rdi
#endif
ret
.data
.p2align 7
sha256_4h:
.long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
.long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85
.long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372
.long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a
.long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f
.long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c
.long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
.long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
.data
.p2align 7
sha256_4k:
.long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98
.long 0x71374491, 0x71374491, 0x71374491, 0x71374491
.long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf
.long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5
.long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b
.long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1
.long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4
.long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5
.long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98
.long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01
.long 0x243185be, 0x243185be, 0x243185be, 0x243185be
.long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3
.long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74
.long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe
.long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7
.long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174
.long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1
.long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786
.long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6
.long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc
.long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f
.long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa
.long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc
.long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da
.long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152
.long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d
.long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8
.long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7
.long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3
.long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147
.long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351
.long 0x14292967, 0x14292967, 0x14292967, 0x14292967
.long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85
.long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138
.long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc
.long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13
.long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354
.long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb
.long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e
.long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85
.long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1
.long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b
.long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70
.long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3
.long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819
.long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624
.long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585
.long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070
.long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116
.long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08
.long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c
.long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5
.long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3
.long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a
.long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f
.long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3
.long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee
.long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f
.long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814
.long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208
.long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa
.long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb
.long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7
.long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2
.data
.p2align 6
sha256d_4preext2_17:
.long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000
sha256d_4preext2_23:
.long 0x11002000, 0x11002000, 0x11002000, 0x11002000
sha256d_4preext2_24:
.long 0x80000000, 0x80000000, 0x80000000, 0x80000000
sha256d_4preext2_30:
.long 0x00400022, 0x00400022, 0x00400022, 0x00400022
#ifdef USE_AVX2
.data
.p2align 7
sha256_8h:
.long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
.long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85
.long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372
.long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a
.long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f
.long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c
.long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
.long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
.data
.p2align 7
sha256_8k:
.long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98
.long 0x71374491, 0x71374491, 0x71374491, 0x71374491, 0x71374491, 0x71374491, 0x71374491, 0x71374491
.long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf
.long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5
.long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b
.long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1
.long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4
.long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5
.long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98
.long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01
.long 0x243185be, 0x243185be, 0x243185be, 0x243185be, 0x243185be, 0x243185be, 0x243185be, 0x243185be
.long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3
.long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74
.long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe
.long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7
.long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174
.long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1
.long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786
.long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6
.long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc
.long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f
.long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa
.long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc
.long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da
.long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152
.long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d
.long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8
.long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7
.long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3
.long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147
.long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351
.long 0x14292967, 0x14292967, 0x14292967, 0x14292967, 0x14292967, 0x14292967, 0x14292967, 0x14292967
.long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85
.long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138
.long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc
.long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13
.long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354
.long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb
.long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e
.long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85
.long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1
.long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b
.long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70
.long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3
.long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819
.long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624
.long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585
.long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070
.long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116
.long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08
.long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c
.long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5
.long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3
.long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a
.long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f
.long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3
.long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee
.long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f
.long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814
.long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208
.long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa
.long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb
.long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7
.long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2
.data
.p2align 6
sha256d_8preext2_17:
.long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000
sha256d_8preext2_23:
.long 0x11002000, 0x11002000, 0x11002000, 0x11002000, 0x11002000, 0x11002000, 0x11002000, 0x11002000
sha256d_8preext2_24:
.long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000
sha256d_8preext2_30:
.long 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022, 0x00400022
#endif /* USE_AVX2 */
.text
.p2align 6
.globl sha256_init_4way
.globl _sha256_init_4way
sha256_init_4way:
_sha256_init_4way:
#if defined(_WIN64) || defined(__CYGWIN__)
pushq %rdi
movq %rcx, %rdi
#endif
movdqa sha256_4h+0(%rip), %xmm0
movdqa sha256_4h+16(%rip), %xmm1
movdqa sha256_4h+32(%rip), %xmm2
movdqa sha256_4h+48(%rip), %xmm3
movdqu %xmm0, 0(%rdi)
movdqu %xmm1, 16(%rdi)
movdqu %xmm2, 32(%rdi)
movdqu %xmm3, 48(%rdi)
movdqa sha256_4h+64(%rip), %xmm0
movdqa sha256_4h+80(%rip), %xmm1
movdqa sha256_4h+96(%rip), %xmm2
movdqa sha256_4h+112(%rip), %xmm3
movdqu %xmm0, 64(%rdi)
movdqu %xmm1, 80(%rdi)
movdqu %xmm2, 96(%rdi)
movdqu %xmm3, 112(%rdi)
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rdi
#endif
ret
#ifdef USE_AVX2
.text
.p2align 6
.globl sha256_init_8way
.globl _sha256_init_8way
sha256_init_8way:
_sha256_init_8way:
#if defined(_WIN64) || defined(__CYGWIN__)
pushq %rdi
movq %rcx, %rdi
#endif
vpbroadcastd sha256_4h+0(%rip), %ymm0
vpbroadcastd sha256_4h+16(%rip), %ymm1
vpbroadcastd sha256_4h+32(%rip), %ymm2
vpbroadcastd sha256_4h+48(%rip), %ymm3
vmovdqu %ymm0, 0*32(%rdi)
vmovdqu %ymm1, 1*32(%rdi)
vmovdqu %ymm2, 2*32(%rdi)
vmovdqu %ymm3, 3*32(%rdi)
vpbroadcastd sha256_4h+64(%rip), %ymm0
vpbroadcastd sha256_4h+80(%rip), %ymm1
vpbroadcastd sha256_4h+96(%rip), %ymm2
vpbroadcastd sha256_4h+112(%rip), %ymm3
vmovdqu %ymm0, 4*32(%rdi)
vmovdqu %ymm1, 5*32(%rdi)
vmovdqu %ymm2, 6*32(%rdi)
vmovdqu %ymm3, 7*32(%rdi)
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rdi
#endif
ret
#endif /* USE_AVX2 */
.macro sha256_sse2_extend_round i
movdqa (\i-15)*16(%rax), %xmm0
movdqa %xmm0, %xmm2
psrld $3, %xmm0
movdqa %xmm0, %xmm1
pslld $14, %xmm2
psrld $4, %xmm1
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
psrld $11, %xmm1
pslld $11, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
paddd (\i-16)*16(%rax), %xmm0
paddd (\i-7)*16(%rax), %xmm0
movdqa %xmm3, %xmm2
psrld $10, %xmm3
pslld $13, %xmm2
movdqa %xmm3, %xmm1
psrld $7, %xmm1
pxor %xmm1, %xmm3
pxor %xmm2, %xmm3
psrld $2, %xmm1
pslld $2, %xmm2
pxor %xmm1, %xmm3
pxor %xmm2, %xmm3
paddd %xmm0, %xmm3
movdqa %xmm3, \i*16(%rax)
.endm
.macro sha256_sse2_extend_doubleround i
movdqa (\i-15)*16(%rax), %xmm0
movdqa (\i-14)*16(%rax), %xmm4
movdqa %xmm0, %xmm2
movdqa %xmm4, %xmm6
psrld $3, %xmm0
psrld $3, %xmm4
movdqa %xmm0, %xmm1
movdqa %xmm4, %xmm5
pslld $14, %xmm2
pslld $14, %xmm6
psrld $4, %xmm1
psrld $4, %xmm5
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
psrld $11, %xmm1
psrld $11, %xmm5
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
pslld $11, %xmm2
pslld $11, %xmm6
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
paddd (\i-16)*16(%rax), %xmm0
paddd (\i-15)*16(%rax), %xmm4
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
paddd (\i-7)*16(%rax), %xmm0
paddd (\i-6)*16(%rax), %xmm4
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, \i*16(%rax)
movdqa %xmm7, (\i+1)*16(%rax)
.endm
.macro sha256_sse2_main_round i
movdqa 16*(\i)(%rax), %xmm6
movdqa %xmm0, %xmm1
movdqa 16(%rsp), %xmm2
pandn %xmm2, %xmm1
paddd 32(%rsp), %xmm6
movdqa %xmm2, 32(%rsp)
movdqa 0(%rsp), %xmm2
movdqa %xmm2, 16(%rsp)
pand %xmm0, %xmm2
pxor %xmm2, %xmm1
movdqa %xmm0, 0(%rsp)
paddd %xmm1, %xmm6
movdqa %xmm0, %xmm1
psrld $6, %xmm0
paddd 16*(\i)(%rcx), %xmm6
movdqa %xmm0, %xmm2
pslld $7, %xmm1
psrld $5, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
pslld $14, %xmm1
psrld $14, %xmm2
pxor %xmm1, %xmm0
pslld $5, %xmm1
pxor %xmm2, %xmm0
pxor %xmm1, %xmm0
movdqa %xmm5, %xmm1
paddd %xmm0, %xmm6
movdqa %xmm3, %xmm0
movdqa %xmm4, %xmm3
movdqa %xmm4, %xmm2
paddd %xmm6, %xmm0
pand %xmm5, %xmm2
pand %xmm7, %xmm1
pand %xmm7, %xmm4
pxor %xmm4, %xmm1
movdqa %xmm5, %xmm4
movdqa %xmm7, %xmm5
pxor %xmm2, %xmm1
paddd %xmm1, %xmm6
movdqa %xmm7, %xmm2
psrld $2, %xmm7
movdqa %xmm7, %xmm1
pslld $10, %xmm2
psrld $11, %xmm1
pxor %xmm2, %xmm7
pslld $9, %xmm2
pxor %xmm1, %xmm7
psrld $9, %xmm1
pxor %xmm2, %xmm7
pslld $11, %xmm2
pxor %xmm1, %xmm7
pxor %xmm2, %xmm7
paddd %xmm6, %xmm7
.endm
.macro sha256_sse2_main_quadround i
sha256_sse2_main_round \i+0
sha256_sse2_main_round \i+1
sha256_sse2_main_round \i+2
sha256_sse2_main_round \i+3
.endm
#if defined(USE_AVX)
.macro sha256_avx_extend_round i
vmovdqa (\i-15)*16(%rax), %xmm0
vpslld $14, %xmm0, %xmm2
vpsrld $3, %xmm0, %xmm0
vpsrld $4, %xmm0, %xmm1
vpxor %xmm1, %xmm0, %xmm0
vpxor %xmm2, %xmm0, %xmm0
vpsrld $11, %xmm1, %xmm1
vpslld $11, %xmm2, %xmm2
vpxor %xmm1, %xmm0, %xmm0
vpxor %xmm2, %xmm0, %xmm0
vpaddd (\i-16)*16(%rax), %xmm0, %xmm0
vpaddd (\i-7)*16(%rax), %xmm0, %xmm0
vpslld $13, %xmm3, %xmm2
vpsrld $10, %xmm3, %xmm3
vpsrld $7, %xmm3, %xmm1
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm2, %xmm3, %xmm3
vpsrld $2, %xmm1, %xmm1
vpslld $2, %xmm2, %xmm2
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm2, %xmm3, %xmm3
vpaddd %xmm0, %xmm3, %xmm3
vmovdqa %xmm3, \i*16(%rax)
.endm
.macro sha256_avx_extend_doubleround i
vmovdqa (\i-15)*16(%rax), %xmm0
vmovdqa (\i-14)*16(%rax), %xmm4
vpslld $14, %xmm0, %xmm2
vpslld $14, %xmm4, %xmm6
vpsrld $3, %xmm0, %xmm8
vpsrld $3, %xmm4, %xmm4
vpsrld $7, %xmm0, %xmm1
vpsrld $4, %xmm4, %xmm5
vpxor %xmm1, %xmm8, %xmm8
vpxor %xmm5, %xmm4, %xmm4
vpsrld $11, %xmm1, %xmm1
vpsrld $11, %xmm5, %xmm5
vpxor %xmm2, %xmm8, %xmm8
vpxor %xmm6, %xmm4, %xmm4
vpslld $11, %xmm2, %xmm2
vpslld $11, %xmm6, %xmm6
vpxor %xmm1, %xmm8, %xmm8
vpxor %xmm5, %xmm4, %xmm4
vpxor %xmm2, %xmm8, %xmm8
vpxor %xmm6, %xmm4, %xmm4
vpaddd %xmm0, %xmm4, %xmm4
vpaddd (\i-16)*16(%rax), %xmm8, %xmm0
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpaddd (\i-7)*16(%rax), %xmm0, %xmm0
vpaddd (\i-6)*16(%rax), %xmm4, %xmm4
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, \i*16(%rax)
vmovdqa %xmm7, (\i+1)*16(%rax)
.endm
.macro sha256_avx_main_round i, r0, r1, r2, r3, r4, r5, r6, r7
vpaddd 16*(\i)(%rax), \r0, %xmm6
vpaddd 16*(\i)(%rcx), %xmm6, %xmm6
vpandn \r1, \r3, %xmm1
vpand \r3, \r2, %xmm2
vpxor %xmm2, %xmm1, %xmm1
vpaddd %xmm1, %xmm6, %xmm6
vpslld $7, \r3, %xmm1
vpsrld $6, \r3, \r0
vpsrld $5, \r0, %xmm2
vpxor %xmm1, \r0, \r0
vpxor %xmm2, \r0, \r0
vpslld $14, %xmm1, %xmm1
vpsrld $14, %xmm2, %xmm2
vpxor %xmm1, \r0, \r0
vpxor %xmm2, \r0, \r0
vpslld $5, %xmm1, %xmm1
vpxor %xmm1, \r0, \r0
vpaddd \r0, %xmm6, %xmm6
vpaddd %xmm6, \r4, \r0
vpand \r6, \r5, %xmm2
vpand \r7, \r5, \r4
vpand \r7, \r6, %xmm1
vpxor \r4, %xmm1, %xmm1
vpxor %xmm2, %xmm1, %xmm1
vpaddd %xmm1, %xmm6, %xmm6
vpslld $10, \r7, %xmm2
vpsrld $2, \r7, \r4
vpsrld $11, \r4, %xmm1
vpxor %xmm2, \r4, \r4
vpxor %xmm1, \r4, \r4
vpslld $9, %xmm2, %xmm2
vpsrld $9, %xmm1, %xmm1
vpxor %xmm2, \r4, \r4
vpxor %xmm1, \r4, \r4
vpslld $11, %xmm2, %xmm2
vpxor %xmm2, \r4, \r4
vpaddd %xmm6, \r4, \r4
.endm
.macro sha256_avx_main_quadround i
sha256_avx_main_round \i+0, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7
sha256_avx_main_round \i+1, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3
sha256_avx_main_round \i+2, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4
sha256_avx_main_round \i+3, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5
.endm
#endif /* USE_AVX */
#if defined(USE_AVX2)
.macro sha256_avx2_extend_round i
vmovdqa (\i-15)*32(%rax), %ymm0
vpslld $14, %ymm0, %ymm2
vpsrld $3, %ymm0, %ymm0
vpsrld $4, %ymm0, %ymm1
vpxor %ymm1, %ymm0, %ymm0
vpxor %ymm2, %ymm0, %ymm0
vpsrld $11, %ymm1, %ymm1
vpslld $11, %ymm2, %ymm2
vpxor %ymm1, %ymm0, %ymm0
vpxor %ymm2, %ymm0, %ymm0
vpaddd (\i-16)*32(%rax), %ymm0, %ymm0
vpaddd (\i-7)*32(%rax), %ymm0, %ymm0
vpslld $13, %ymm3, %ymm2
vpsrld $10, %ymm3, %ymm3
vpsrld $7, %ymm3, %ymm1
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm2, %ymm3, %ymm3
vpsrld $2, %ymm1, %ymm1
vpslld $2, %ymm2, %ymm2
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm2, %ymm3, %ymm3
vpaddd %ymm0, %ymm3, %ymm3
vmovdqa %ymm3, \i*32(%rax)
.endm
.macro sha256_avx2_extend_doubleround i
vmovdqa (\i-15)*32(%rax), %ymm0
vmovdqa (\i-14)*32(%rax), %ymm4
vpslld $14, %ymm0, %ymm2
vpslld $14, %ymm4, %ymm6
vpsrld $3, %ymm0, %ymm8
vpsrld $3, %ymm4, %ymm4
vpsrld $7, %ymm0, %ymm1
vpsrld $4, %ymm4, %ymm5
vpxor %ymm1, %ymm8, %ymm8
vpxor %ymm5, %ymm4, %ymm4
vpsrld $11, %ymm1, %ymm1
vpsrld $11, %ymm5, %ymm5
vpxor %ymm2, %ymm8, %ymm8
vpxor %ymm6, %ymm4, %ymm4
vpslld $11, %ymm2, %ymm2
vpslld $11, %ymm6, %ymm6
vpxor %ymm1, %ymm8, %ymm8
vpxor %ymm5, %ymm4, %ymm4
vpxor %ymm2, %ymm8, %ymm8
vpxor %ymm6, %ymm4, %ymm4
vpaddd %ymm0, %ymm4, %ymm4
vpaddd (\i-16)*32(%rax), %ymm8, %ymm0
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpaddd (\i-7)*32(%rax), %ymm0, %ymm0
vpaddd (\i-6)*32(%rax), %ymm4, %ymm4
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd %ymm0, %ymm3, %ymm3
vpaddd %ymm4, %ymm7, %ymm7
vmovdqa %ymm3, \i*32(%rax)
vmovdqa %ymm7, (\i+1)*32(%rax)
.endm
.macro sha256_avx2_main_round i, r0, r1, r2, r3, r4, r5, r6, r7
vpaddd 32*(\i)(%rax), \r0, %ymm6
vpaddd 32*(\i)(%rcx), %ymm6, %ymm6
vpandn \r1, \r3, %ymm1
vpand \r3, \r2, %ymm2
vpxor %ymm2, %ymm1, %ymm1
vpaddd %ymm1, %ymm6, %ymm6
vpslld $7, \r3, %ymm1
vpsrld $6, \r3, \r0
vpsrld $5, \r0, %ymm2
vpxor %ymm1, \r0, \r0
vpxor %ymm2, \r0, \r0
vpslld $14, %ymm1, %ymm1
vpsrld $14, %ymm2, %ymm2
vpxor %ymm1, \r0, \r0
vpxor %ymm2, \r0, \r0
vpslld $5, %ymm1, %ymm1
vpxor %ymm1, \r0, \r0
vpaddd \r0, %ymm6, %ymm6
vpaddd %ymm6, \r4, \r0
vpand \r6, \r5, %ymm2
vpand \r7, \r5, \r4
vpand \r7, \r6, %ymm1
vpxor \r4, %ymm1, %ymm1
vpxor %ymm2, %ymm1, %ymm1
vpaddd %ymm1, %ymm6, %ymm6
vpslld $10, \r7, %ymm2
vpsrld $2, \r7, \r4
vpsrld $11, \r4, %ymm1
vpxor %ymm2, \r4, \r4
vpxor %ymm1, \r4, \r4
vpslld $9, %ymm2, %ymm2
vpsrld $9, %ymm1, %ymm1
vpxor %ymm2, \r4, \r4
vpxor %ymm1, \r4, \r4
vpslld $11, %ymm2, %ymm2
vpxor %ymm2, \r4, \r4
vpaddd %ymm6, \r4, \r4
.endm
.macro sha256_avx2_main_quadround i
sha256_avx2_main_round \i+0, %ymm10, %ymm9, %ymm8, %ymm0, %ymm3, %ymm4, %ymm5, %ymm7
sha256_avx2_main_round \i+1, %ymm9, %ymm8, %ymm0, %ymm10, %ymm4, %ymm5, %ymm7, %ymm3
sha256_avx2_main_round \i+2, %ymm8, %ymm0, %ymm10, %ymm9, %ymm5, %ymm7, %ymm3, %ymm4
sha256_avx2_main_round \i+3, %ymm0, %ymm10, %ymm9, %ymm8, %ymm7, %ymm3, %ymm4, %ymm5
.endm
#endif /* USE_AVX2 */
#if defined(USE_XOP)
.macro sha256_xop_extend_round i
vmovdqa (\i-15)*16(%rax), %xmm0
vprotd $25, %xmm0, %xmm1
vprotd $14, %xmm0, %xmm2
vpsrld $3, %xmm0, %xmm0
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm2, %xmm0, %xmm0
vpaddd (\i-16)*16(%rax), %xmm0, %xmm0
vpaddd (\i-7)*16(%rax), %xmm0, %xmm0
vprotd $15, %xmm3, %xmm1
vprotd $13, %xmm3, %xmm2
vpsrld $10, %xmm3, %xmm3
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm2, %xmm3, %xmm3
vpaddd %xmm0, %xmm3, %xmm3
vmovdqa %xmm3, \i*16(%rax)
.endm
.macro sha256_xop_extend_doubleround i
vmovdqa (\i-15)*16(%rax), %xmm0
vmovdqa (\i-14)*16(%rax), %xmm4
vprotd $25, %xmm0, %xmm1
vprotd $25, %xmm4, %xmm5
vprotd $14, %xmm0, %xmm2
vprotd $14, %xmm4, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpsrld $3, %xmm0, %xmm0
vpsrld $3, %xmm4, %xmm4
vpxor %xmm2, %xmm0, %xmm0
vpxor %xmm6, %xmm4, %xmm4
vpaddd (\i-16)*16(%rax), %xmm0, %xmm0
vpaddd (\i-15)*16(%rax), %xmm4, %xmm4
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpaddd (\i-7)*16(%rax), %xmm0, %xmm0
vpaddd (\i-6)*16(%rax), %xmm4, %xmm4
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, \i*16(%rax)
vmovdqa %xmm7, (\i+1)*16(%rax)
.endm
.macro sha256_xop_main_round i, r0, r1, r2, r3, r4, r5, r6, r7
vpaddd 16*(\i)(%rax), \r0, %xmm6
vpaddd 16*(\i)(%rcx), %xmm6, %xmm6
vpandn \r1, \r3, %xmm1
vpand \r3, \r2, %xmm2
vpxor %xmm2, %xmm1, %xmm1
vpaddd %xmm1, %xmm6, %xmm6
vprotd $26, \r3, %xmm1
vprotd $21, \r3, %xmm2
vpxor %xmm1, %xmm2, %xmm2
vprotd $7, \r3, \r0
vpxor %xmm2, \r0, \r0
vpaddd \r0, %xmm6, %xmm6
vpaddd %xmm6, \r4, \r0
vpand \r6, \r5, %xmm2
vpand \r7, \r5, \r4
vpand \r7, \r6, %xmm1
vpxor \r4, %xmm1, %xmm1
vpxor %xmm2, %xmm1, %xmm1
vpaddd %xmm1, %xmm6, %xmm6
vprotd $30, \r7, %xmm1
vprotd $19, \r7, %xmm2
vpxor %xmm1, %xmm2, %xmm2
vprotd $10, \r7, \r4
vpxor %xmm2, \r4, \r4
vpaddd %xmm6, \r4, \r4
.endm
.macro sha256_xop_main_quadround i
sha256_xop_main_round \i+0, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7
sha256_xop_main_round \i+1, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3
sha256_xop_main_round \i+2, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4
sha256_xop_main_round \i+3, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5
.endm
#endif /* USE_XOP */
.text
.p2align 6
sha256_transform_4way_core_sse2:
leaq 256(%rsp), %rcx
leaq 48*16(%rcx), %rax
movdqa -2*16(%rcx), %xmm3
movdqa -1*16(%rcx), %xmm7
sha256_transform_4way_sse2_extend_loop:
movdqa -15*16(%rcx), %xmm0
movdqa -14*16(%rcx), %xmm4
movdqa %xmm0, %xmm2
movdqa %xmm4, %xmm6
psrld $3, %xmm0
psrld $3, %xmm4
movdqa %xmm0, %xmm1
movdqa %xmm4, %xmm5
pslld $14, %xmm2
pslld $14, %xmm6
psrld $4, %xmm1
psrld $4, %xmm5
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
psrld $11, %xmm1
psrld $11, %xmm5
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
pslld $11, %xmm2
pslld $11, %xmm6
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
paddd -16*16(%rcx), %xmm0
paddd -15*16(%rcx), %xmm4
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
paddd -7*16(%rcx), %xmm0
paddd -6*16(%rcx), %xmm4
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, (%rcx)
movdqa %xmm7, 16(%rcx)
addq $2*16, %rcx
cmpq %rcx, %rax
jne sha256_transform_4way_sse2_extend_loop
movdqu 0(%rdi), %xmm7
movdqu 16(%rdi), %xmm5
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm3
movdqu 64(%rdi), %xmm0
movdqu 80(%rdi), %xmm8
movdqu 96(%rdi), %xmm9
movdqu 112(%rdi), %xmm10
leaq sha256_4k(%rip), %rcx
xorq %rax, %rax
sha256_transform_4way_sse2_main_loop:
movdqa (%rsp, %rax), %xmm6
paddd (%rcx, %rax), %xmm6
paddd %xmm10, %xmm6
movdqa %xmm0, %xmm1
movdqa %xmm9, %xmm2
pandn %xmm2, %xmm1
movdqa %xmm2, %xmm10
movdqa %xmm8, %xmm2
movdqa %xmm2, %xmm9
pand %xmm0, %xmm2
pxor %xmm2, %xmm1
movdqa %xmm0, %xmm8
paddd %xmm1, %xmm6
movdqa %xmm0, %xmm1
psrld $6, %xmm0
movdqa %xmm0, %xmm2
pslld $7, %xmm1
psrld $5, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
pslld $14, %xmm1
psrld $14, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
pslld $5, %xmm1
pxor %xmm1, %xmm0
paddd %xmm0, %xmm6
movdqa %xmm3, %xmm0
paddd %xmm6, %xmm0
movdqa %xmm5, %xmm1
movdqa %xmm4, %xmm3
movdqa %xmm4, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm4
pand %xmm7, %xmm1
pxor %xmm4, %xmm1
movdqa %xmm5, %xmm4
movdqa %xmm7, %xmm5
pxor %xmm2, %xmm1
paddd %xmm1, %xmm6
movdqa %xmm7, %xmm2
psrld $2, %xmm7
movdqa %xmm7, %xmm1
pslld $10, %xmm2
psrld $11, %xmm1
pxor %xmm2, %xmm7
pxor %xmm1, %xmm7
pslld $9, %xmm2
psrld $9, %xmm1
pxor %xmm2, %xmm7
pxor %xmm1, %xmm7
pslld $11, %xmm2
pxor %xmm2, %xmm7
paddd %xmm6, %xmm7
addq $16, %rax
cmpq $16*64, %rax
jne sha256_transform_4way_sse2_main_loop
jmp sha256_transform_4way_finish
#if defined(USE_AVX)
.text
.p2align 6
sha256_transform_4way_core_avx:
leaq 256(%rsp), %rax
movdqa -2*16(%rax), %xmm3
movdqa -1*16(%rax), %xmm7
sha256_avx_extend_doubleround 0
sha256_avx_extend_doubleround 2
sha256_avx_extend_doubleround 4
sha256_avx_extend_doubleround 6
sha256_avx_extend_doubleround 8
sha256_avx_extend_doubleround 10
sha256_avx_extend_doubleround 12
sha256_avx_extend_doubleround 14
sha256_avx_extend_doubleround 16
sha256_avx_extend_doubleround 18
sha256_avx_extend_doubleround 20
sha256_avx_extend_doubleround 22
sha256_avx_extend_doubleround 24
sha256_avx_extend_doubleround 26
sha256_avx_extend_doubleround 28
sha256_avx_extend_doubleround 30
sha256_avx_extend_doubleround 32
sha256_avx_extend_doubleround 34
sha256_avx_extend_doubleround 36
sha256_avx_extend_doubleround 38
sha256_avx_extend_doubleround 40
sha256_avx_extend_doubleround 42
sha256_avx_extend_doubleround 44
sha256_avx_extend_doubleround 46
movdqu 0(%rdi), %xmm7
movdqu 16(%rdi), %xmm5
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm3
movdqu 64(%rdi), %xmm0
movdqu 80(%rdi), %xmm8
movdqu 96(%rdi), %xmm9
movdqu 112(%rdi), %xmm10
movq %rsp, %rax
leaq sha256_4k(%rip), %rcx
sha256_avx_main_quadround 0
sha256_avx_main_quadround 4
sha256_avx_main_quadround 8
sha256_avx_main_quadround 12
sha256_avx_main_quadround 16
sha256_avx_main_quadround 20
sha256_avx_main_quadround 24
sha256_avx_main_quadround 28
sha256_avx_main_quadround 32
sha256_avx_main_quadround 36
sha256_avx_main_quadround 40
sha256_avx_main_quadround 44
sha256_avx_main_quadround 48
sha256_avx_main_quadround 52
sha256_avx_main_quadround 56
sha256_avx_main_quadround 60
jmp sha256_transform_4way_finish
#endif /* USE_AVX */
#if defined(USE_XOP)
.text
.p2align 6
sha256_transform_4way_core_xop:
leaq 256(%rsp), %rax
movdqa -2*16(%rax), %xmm3
movdqa -1*16(%rax), %xmm7
sha256_xop_extend_doubleround 0
sha256_xop_extend_doubleround 2
sha256_xop_extend_doubleround 4
sha256_xop_extend_doubleround 6
sha256_xop_extend_doubleround 8
sha256_xop_extend_doubleround 10
sha256_xop_extend_doubleround 12
sha256_xop_extend_doubleround 14
sha256_xop_extend_doubleround 16
sha256_xop_extend_doubleround 18
sha256_xop_extend_doubleround 20
sha256_xop_extend_doubleround 22
sha256_xop_extend_doubleround 24
sha256_xop_extend_doubleround 26
sha256_xop_extend_doubleround 28
sha256_xop_extend_doubleround 30
sha256_xop_extend_doubleround 32
sha256_xop_extend_doubleround 34
sha256_xop_extend_doubleround 36
sha256_xop_extend_doubleround 38
sha256_xop_extend_doubleround 40
sha256_xop_extend_doubleround 42
sha256_xop_extend_doubleround 44
sha256_xop_extend_doubleround 46
movdqu 0(%rdi), %xmm7
movdqu 16(%rdi), %xmm5
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm3
movdqu 64(%rdi), %xmm0
movdqu 80(%rdi), %xmm8
movdqu 96(%rdi), %xmm9
movdqu 112(%rdi), %xmm10
movq %rsp, %rax
leaq sha256_4k(%rip), %rcx
sha256_xop_main_quadround 0
sha256_xop_main_quadround 4
sha256_xop_main_quadround 8
sha256_xop_main_quadround 12
sha256_xop_main_quadround 16
sha256_xop_main_quadround 20
sha256_xop_main_quadround 24
sha256_xop_main_quadround 28
sha256_xop_main_quadround 32
sha256_xop_main_quadround 36
sha256_xop_main_quadround 40
sha256_xop_main_quadround 44
sha256_xop_main_quadround 48
sha256_xop_main_quadround 52
sha256_xop_main_quadround 56
sha256_xop_main_quadround 60
jmp sha256_transform_4way_finish
#endif /* USE_XOP */
.data
.p2align 3
sha256_transform_4way_core_addr:
.quad 0x0
.macro p2bswap_rsi_rsp i
movdqu \i*16(%rsi), %xmm0
movdqu (\i+1)*16(%rsi), %xmm2
pshuflw $0xb1, %xmm0, %xmm0
pshuflw $0xb1, %xmm2, %xmm2
pshufhw $0xb1, %xmm0, %xmm0
pshufhw $0xb1, %xmm2, %xmm2
movdqa %xmm0, %xmm1
movdqa %xmm2, %xmm3
psrlw $8, %xmm1
psrlw $8, %xmm3
psllw $8, %xmm0
psllw $8, %xmm2
pxor %xmm1, %xmm0
pxor %xmm3, %xmm2
movdqa %xmm0, \i*16(%rsp)
movdqa %xmm2, (\i+1)*16(%rsp)
.endm
.text
.p2align 6
.globl sha256_transform_4way
.globl _sha256_transform_4way
sha256_transform_4way:
_sha256_transform_4way:
#if defined(_WIN64) || defined(__CYGWIN__)
pushq %rdi
subq $96, %rsp
movdqa %xmm6, 0(%rsp)
movdqa %xmm7, 16(%rsp)
movdqa %xmm8, 32(%rsp)
movdqa %xmm9, 48(%rsp)
movdqa %xmm10, 64(%rsp)
movdqa %xmm11, 80(%rsp)
pushq %rsi
movq %rcx, %rdi
movq %rdx, %rsi
movq %r8, %rdx
#endif
movq %rsp, %r8
subq $1032, %rsp
andq $-128, %rsp
testq %rdx, %rdx
jnz sha256_transform_4way_swap
movdqu 0*16(%rsi), %xmm0
movdqu 1*16(%rsi), %xmm1
movdqu 2*16(%rsi), %xmm2
movdqu 3*16(%rsi), %xmm3
movdqu 4*16(%rsi), %xmm4
movdqu 5*16(%rsi), %xmm5
movdqu 6*16(%rsi), %xmm6
movdqu 7*16(%rsi), %xmm7
movdqa %xmm0, 0*16(%rsp)
movdqa %xmm1, 1*16(%rsp)
movdqa %xmm2, 2*16(%rsp)
movdqa %xmm3, 3*16(%rsp)
movdqa %xmm4, 4*16(%rsp)
movdqa %xmm5, 5*16(%rsp)
movdqa %xmm6, 6*16(%rsp)
movdqa %xmm7, 7*16(%rsp)
movdqu 8*16(%rsi), %xmm0
movdqu 9*16(%rsi), %xmm1
movdqu 10*16(%rsi), %xmm2
movdqu 11*16(%rsi), %xmm3
movdqu 12*16(%rsi), %xmm4
movdqu 13*16(%rsi), %xmm5
movdqu 14*16(%rsi), %xmm6
movdqu 15*16(%rsi), %xmm7
movdqa %xmm0, 8*16(%rsp)
movdqa %xmm1, 9*16(%rsp)
movdqa %xmm2, 10*16(%rsp)
movdqa %xmm3, 11*16(%rsp)
movdqa %xmm4, 12*16(%rsp)
movdqa %xmm5, 13*16(%rsp)
movdqa %xmm6, 14*16(%rsp)
movdqa %xmm7, 15*16(%rsp)
jmp *sha256_transform_4way_core_addr(%rip)
.p2align 6
sha256_transform_4way_swap:
p2bswap_rsi_rsp 0
p2bswap_rsi_rsp 2
p2bswap_rsi_rsp 4
p2bswap_rsi_rsp 6
p2bswap_rsi_rsp 8
p2bswap_rsi_rsp 10
p2bswap_rsi_rsp 12
p2bswap_rsi_rsp 14
jmp *sha256_transform_4way_core_addr(%rip)
.p2align 6
sha256_transform_4way_finish:
movdqu 0(%rdi), %xmm2
movdqu 16(%rdi), %xmm6
movdqu 32(%rdi), %xmm11
movdqu 48(%rdi), %xmm1
paddd %xmm2, %xmm7
paddd %xmm6, %xmm5
paddd %xmm11, %xmm4
paddd %xmm1, %xmm3
movdqu 64(%rdi), %xmm2
movdqu 80(%rdi), %xmm6
movdqu 96(%rdi), %xmm11
movdqu 112(%rdi), %xmm1
paddd %xmm2, %xmm0
paddd %xmm6, %xmm8
paddd %xmm11, %xmm9
paddd %xmm1, %xmm10
movdqu %xmm7, 0(%rdi)
movdqu %xmm5, 16(%rdi)
movdqu %xmm4, 32(%rdi)
movdqu %xmm3, 48(%rdi)
movdqu %xmm0, 64(%rdi)
movdqu %xmm8, 80(%rdi)
movdqu %xmm9, 96(%rdi)
movdqu %xmm10, 112(%rdi)
movq %r8, %rsp
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rsi
movdqa 0(%rsp), %xmm6
movdqa 16(%rsp), %xmm7
movdqa 32(%rsp), %xmm8
movdqa 48(%rsp), %xmm9
movdqa 64(%rsp), %xmm10
movdqa 80(%rsp), %xmm11
addq $96, %rsp
popq %rdi
#endif
ret
#ifdef USE_AVX2
.text
.p2align 6
sha256_transform_8way_core_avx2:
leaq 8*64(%rsp), %rax
vmovdqa -2*32(%rax), %ymm3
vmovdqa -1*32(%rax), %ymm7
sha256_avx2_extend_doubleround 0
sha256_avx2_extend_doubleround 2
sha256_avx2_extend_doubleround 4
sha256_avx2_extend_doubleround 6
sha256_avx2_extend_doubleround 8
sha256_avx2_extend_doubleround 10
sha256_avx2_extend_doubleround 12
sha256_avx2_extend_doubleround 14
sha256_avx2_extend_doubleround 16
sha256_avx2_extend_doubleround 18
sha256_avx2_extend_doubleround 20
sha256_avx2_extend_doubleround 22
sha256_avx2_extend_doubleround 24
sha256_avx2_extend_doubleround 26
sha256_avx2_extend_doubleround 28
sha256_avx2_extend_doubleround 30
sha256_avx2_extend_doubleround 32
sha256_avx2_extend_doubleround 34
sha256_avx2_extend_doubleround 36
sha256_avx2_extend_doubleround 38
sha256_avx2_extend_doubleround 40
sha256_avx2_extend_doubleround 42
sha256_avx2_extend_doubleround 44
sha256_avx2_extend_doubleround 46
vmovdqu 0*32(%rdi), %ymm7
vmovdqu 1*32(%rdi), %ymm5
vmovdqu 2*32(%rdi), %ymm4
vmovdqu 3*32(%rdi), %ymm3
vmovdqu 4*32(%rdi), %ymm0
vmovdqu 5*32(%rdi), %ymm8
vmovdqu 6*32(%rdi), %ymm9
vmovdqu 7*32(%rdi), %ymm10
movq %rsp, %rax
leaq sha256_8k(%rip), %rcx
sha256_avx2_main_quadround 0
sha256_avx2_main_quadround 4
sha256_avx2_main_quadround 8
sha256_avx2_main_quadround 12
sha256_avx2_main_quadround 16
sha256_avx2_main_quadround 20
sha256_avx2_main_quadround 24
sha256_avx2_main_quadround 28
sha256_avx2_main_quadround 32
sha256_avx2_main_quadround 36
sha256_avx2_main_quadround 40
sha256_avx2_main_quadround 44
sha256_avx2_main_quadround 48
sha256_avx2_main_quadround 52
sha256_avx2_main_quadround 56
sha256_avx2_main_quadround 60
jmp sha256_transform_8way_finish
.macro p2bswap_avx2_rsi_rsp i
vmovdqu \i*32(%rsi), %ymm0
vmovdqu (\i+1)*32(%rsi), %ymm2
vpshuflw $0xb1, %ymm0, %ymm0
vpshuflw $0xb1, %ymm2, %ymm2
vpshufhw $0xb1, %ymm0, %ymm0
vpshufhw $0xb1, %ymm2, %ymm2
vpsrlw $8, %ymm0, %ymm1
vpsrlw $8, %ymm2, %ymm3
vpsllw $8, %ymm0, %ymm0
vpsllw $8, %ymm2, %ymm2
vpxor %ymm1, %ymm0, %ymm0
vpxor %ymm3, %ymm2, %ymm2
vmovdqa %ymm0, \i*32(%rsp)
vmovdqa %ymm2, (\i+1)*32(%rsp)
.endm
.text
.p2align 6
.globl sha256_transform_8way
.globl _sha256_transform_8way
sha256_transform_8way:
_sha256_transform_8way:
#if defined(_WIN64) || defined(__CYGWIN__)
pushq %rdi
subq $96, %rsp
vmovdqa %xmm6, 0(%rsp)
vmovdqa %xmm7, 16(%rsp)
vmovdqa %xmm8, 32(%rsp)
vmovdqa %xmm9, 48(%rsp)
vmovdqa %xmm10, 64(%rsp)
vmovdqa %xmm11, 80(%rsp)
pushq %rsi
movq %rcx, %rdi
movq %rdx, %rsi
movq %r8, %rdx
#endif
movq %rsp, %r8
subq $64*32, %rsp
andq $-128, %rsp
testq %rdx, %rdx
jnz sha256_transform_8way_swap
vmovdqu 0*32(%rsi), %ymm0
vmovdqu 1*32(%rsi), %ymm1
vmovdqu 2*32(%rsi), %ymm2
vmovdqu 3*32(%rsi), %ymm3
vmovdqu 4*32(%rsi), %ymm4
vmovdqu 5*32(%rsi), %ymm5
vmovdqu 6*32(%rsi), %ymm6
vmovdqu 7*32(%rsi), %ymm7
vmovdqa %ymm0, 0*32(%rsp)
vmovdqa %ymm1, 1*32(%rsp)
vmovdqa %ymm2, 2*32(%rsp)
vmovdqa %ymm3, 3*32(%rsp)
vmovdqa %ymm4, 4*32(%rsp)
vmovdqa %ymm5, 5*32(%rsp)
vmovdqa %ymm6, 6*32(%rsp)
vmovdqa %ymm7, 7*32(%rsp)
vmovdqu 8*32(%rsi), %ymm0
vmovdqu 9*32(%rsi), %ymm1
vmovdqu 10*32(%rsi), %ymm2
vmovdqu 11*32(%rsi), %ymm3
vmovdqu 12*32(%rsi), %ymm4
vmovdqu 13*32(%rsi), %ymm5
vmovdqu 14*32(%rsi), %ymm6
vmovdqu 15*32(%rsi), %ymm7
vmovdqa %ymm0, 8*32(%rsp)
vmovdqa %ymm1, 9*32(%rsp)
vmovdqa %ymm2, 10*32(%rsp)
vmovdqa %ymm3, 11*32(%rsp)
vmovdqa %ymm4, 12*32(%rsp)
vmovdqa %ymm5, 13*32(%rsp)
vmovdqa %ymm6, 14*32(%rsp)
vmovdqa %ymm7, 15*32(%rsp)
jmp sha256_transform_8way_core_avx2
.p2align 6
sha256_transform_8way_swap:
p2bswap_avx2_rsi_rsp 0
p2bswap_avx2_rsi_rsp 2
p2bswap_avx2_rsi_rsp 4
p2bswap_avx2_rsi_rsp 6
p2bswap_avx2_rsi_rsp 8
p2bswap_avx2_rsi_rsp 10
p2bswap_avx2_rsi_rsp 12
p2bswap_avx2_rsi_rsp 14
jmp sha256_transform_8way_core_avx2
.p2align 6
sha256_transform_8way_finish:
vmovdqu 0*32(%rdi), %ymm2
vmovdqu 1*32(%rdi), %ymm6
vmovdqu 2*32(%rdi), %ymm11
vmovdqu 3*32(%rdi), %ymm1
vpaddd %ymm2, %ymm7, %ymm7
vpaddd %ymm6, %ymm5, %ymm5
vpaddd %ymm11, %ymm4, %ymm4
vpaddd %ymm1, %ymm3, %ymm3
vmovdqu 4*32(%rdi), %ymm2
vmovdqu 5*32(%rdi), %ymm6
vmovdqu 6*32(%rdi), %ymm11
vmovdqu 7*32(%rdi), %ymm1
vpaddd %ymm2, %ymm0, %ymm0
vpaddd %ymm6, %ymm8, %ymm8
vpaddd %ymm11, %ymm9, %ymm9
vpaddd %ymm1, %ymm10, %ymm10
vmovdqu %ymm7, 0*32(%rdi)
vmovdqu %ymm5, 1*32(%rdi)
vmovdqu %ymm4, 2*32(%rdi)
vmovdqu %ymm3, 3*32(%rdi)
vmovdqu %ymm0, 4*32(%rdi)
vmovdqu %ymm8, 5*32(%rdi)
vmovdqu %ymm9, 6*32(%rdi)
vmovdqu %ymm10, 7*32(%rdi)
movq %r8, %rsp
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rsi
vmovdqa 0(%rsp), %xmm6
vmovdqa 16(%rsp), %xmm7
vmovdqa 32(%rsp), %xmm8
vmovdqa 48(%rsp), %xmm9
vmovdqa 64(%rsp), %xmm10
vmovdqa 80(%rsp), %xmm11
addq $96, %rsp
popq %rdi
#endif
ret
#endif /* USE_AVX2 */
.data
.p2align 3
sha256d_ms_4way_addr:
.quad 0x0
.text
.p2align 6
.globl sha256d_ms_4way
.globl _sha256d_ms_4way
sha256d_ms_4way:
_sha256d_ms_4way:
jmp *sha256d_ms_4way_addr(%rip)
.p2align 6
sha256d_ms_4way_sse2:
#if defined(_WIN64) || defined(__CYGWIN__)
pushq %rdi
subq $32, %rsp
movdqa %xmm6, 0(%rsp)
movdqa %xmm7, 16(%rsp)
pushq %rsi
movq %rcx, %rdi
movq %rdx, %rsi
movq %r8, %rdx
movq %r9, %rcx
#endif
subq $8+67*16, %rsp
leaq 256(%rsi), %rax
sha256d_ms_4way_sse2_extend_loop1:
movdqa 3*16(%rsi), %xmm0
movdqa 2*16(%rax), %xmm3
movdqa 3*16(%rax), %xmm7
movdqa %xmm3, 5*16(%rsp)
movdqa %xmm7, 6*16(%rsp)
movdqa %xmm0, %xmm2
paddd %xmm0, %xmm7
psrld $3, %xmm0
movdqa %xmm0, %xmm1
pslld $14, %xmm2
psrld $4, %xmm1
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
psrld $11, %xmm1
pslld $11, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
paddd %xmm0, %xmm3
movdqa %xmm3, 2*16(%rax)
movdqa %xmm7, 3*16(%rax)
movdqa 4*16(%rax), %xmm0
movdqa %xmm0, 7*16(%rsp)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
movdqa %xmm3, 4*16(%rax)
movdqa %xmm7, 5*16(%rax)
movdqa 6*16(%rax), %xmm0
movdqa 7*16(%rax), %xmm4
movdqa %xmm0, 9*16(%rsp)
movdqa %xmm4, 10*16(%rsp)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, 6*16(%rax)
movdqa %xmm7, 7*16(%rax)
movdqa 8*16(%rax), %xmm0
movdqa 2*16(%rax), %xmm4
movdqa %xmm0, 11*16(%rsp)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, 8*16(%rax)
movdqa %xmm7, 9*16(%rax)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd 3*16(%rax), %xmm3
paddd 4*16(%rax), %xmm7
movdqa %xmm3, 10*16(%rax)
movdqa %xmm7, 11*16(%rax)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd 5*16(%rax), %xmm3
paddd 6*16(%rax), %xmm7
movdqa %xmm3, 12*16(%rax)
movdqa %xmm7, 13*16(%rax)
movdqa 14*16(%rax), %xmm0
movdqa 15*16(%rax), %xmm4
movdqa %xmm0, 17*16(%rsp)
movdqa %xmm4, 18*16(%rsp)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
paddd 7*16(%rax), %xmm0
paddd 8*16(%rax), %xmm4
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, 14*16(%rax)
movdqa %xmm7, 15*16(%rax)
sha256d_ms_4way_sse2_extend_loop2:
sha256_sse2_extend_doubleround 16
sha256_sse2_extend_doubleround 18
sha256_sse2_extend_doubleround 20
sha256_sse2_extend_doubleround 22
sha256_sse2_extend_doubleround 24
sha256_sse2_extend_doubleround 26
sha256_sse2_extend_doubleround 28
sha256_sse2_extend_doubleround 30
sha256_sse2_extend_doubleround 32
sha256_sse2_extend_doubleround 34
sha256_sse2_extend_doubleround 36
sha256_sse2_extend_doubleround 38
sha256_sse2_extend_doubleround 40
sha256_sse2_extend_doubleround 42
jz sha256d_ms_4way_sse2_extend_coda2
sha256_sse2_extend_doubleround 44
sha256_sse2_extend_doubleround 46
movdqa 0(%rcx), %xmm3
movdqa 16(%rcx), %xmm0
movdqa 32(%rcx), %xmm1
movdqa 48(%rcx), %xmm2
movdqa 64(%rcx), %xmm6
movdqa 80(%rcx), %xmm7
movdqa 96(%rcx), %xmm5
movdqa 112(%rcx), %xmm4
movdqa %xmm1, 0(%rsp)
movdqa %xmm2, 16(%rsp)
movdqa %xmm6, 32(%rsp)
movq %rsi, %rax
leaq sha256_4k(%rip), %rcx
jmp sha256d_ms_4way_sse2_main_loop1
sha256d_ms_4way_sse2_main_loop2:
sha256_sse2_main_round 0
sha256_sse2_main_round 1
sha256_sse2_main_round 2
sha256d_ms_4way_sse2_main_loop1:
sha256_sse2_main_round 3
sha256_sse2_main_quadround 4
sha256_sse2_main_quadround 8
sha256_sse2_main_quadround 12
sha256_sse2_main_quadround 16
sha256_sse2_main_quadround 20
sha256_sse2_main_quadround 24
sha256_sse2_main_quadround 28
sha256_sse2_main_quadround 32
sha256_sse2_main_quadround 36
sha256_sse2_main_quadround 40
sha256_sse2_main_quadround 44
sha256_sse2_main_quadround 48
sha256_sse2_main_quadround 52
sha256_sse2_main_round 56
jz sha256d_ms_4way_sse2_finish
sha256_sse2_main_round 57
sha256_sse2_main_round 58
sha256_sse2_main_round 59
sha256_sse2_main_quadround 60
movdqa 5*16(%rsp), %xmm1
movdqa 6*16(%rsp), %xmm2
movdqa 7*16(%rsp), %xmm6
movdqa %xmm1, 18*16(%rsi)
movdqa %xmm2, 19*16(%rsi)
movdqa %xmm6, 20*16(%rsi)
movdqa 9*16(%rsp), %xmm1
movdqa 10*16(%rsp), %xmm2
movdqa 11*16(%rsp), %xmm6
movdqa %xmm1, 22*16(%rsi)
movdqa %xmm2, 23*16(%rsi)
movdqa %xmm6, 24*16(%rsi)
movdqa 17*16(%rsp), %xmm1
movdqa 18*16(%rsp), %xmm2
movdqa %xmm1, 30*16(%rsi)
movdqa %xmm2, 31*16(%rsi)
movdqa 0(%rsp), %xmm1
movdqa 16(%rsp), %xmm2
movdqa 32(%rsp), %xmm6
paddd 0(%rdx), %xmm7
paddd 16(%rdx), %xmm5
paddd 32(%rdx), %xmm4
paddd 48(%rdx), %xmm3
paddd 64(%rdx), %xmm0
paddd 80(%rdx), %xmm1
paddd 96(%rdx), %xmm2
paddd 112(%rdx), %xmm6
movdqa %xmm7, 48+0(%rsp)
movdqa %xmm5, 48+16(%rsp)
movdqa %xmm4, 48+32(%rsp)
movdqa %xmm3, 48+48(%rsp)
movdqa %xmm0, 48+64(%rsp)
movdqa %xmm1, 48+80(%rsp)
movdqa %xmm2, 48+96(%rsp)
movdqa %xmm6, 48+112(%rsp)
pxor %xmm0, %xmm0
movq $0x8000000000000100, %rax
movd %rax, %xmm1
pshufd $0x55, %xmm1, %xmm2
pshufd $0x00, %xmm1, %xmm1
movdqa %xmm2, 48+128(%rsp)
movdqa %xmm0, 48+144(%rsp)
movdqa %xmm0, 48+160(%rsp)
movdqa %xmm0, 48+176(%rsp)
movdqa %xmm0, 48+192(%rsp)
movdqa %xmm0, 48+208(%rsp)
movdqa %xmm0, 48+224(%rsp)
movdqa %xmm1, 48+240(%rsp)
leaq 19*16(%rsp), %rax
cmpq %rax, %rax
movdqa -15*16(%rax), %xmm0
movdqa -14*16(%rax), %xmm4
movdqa %xmm0, %xmm2
movdqa %xmm4, %xmm6
psrld $3, %xmm0
psrld $3, %xmm4
movdqa %xmm0, %xmm1
movdqa %xmm4, %xmm5
pslld $14, %xmm2
pslld $14, %xmm6
psrld $4, %xmm1
psrld $4, %xmm5
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
psrld $11, %xmm1
psrld $11, %xmm5
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
pslld $11, %xmm2
pslld $11, %xmm6
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
paddd -16*16(%rax), %xmm0
paddd -15*16(%rax), %xmm4
paddd sha256d_4preext2_17(%rip), %xmm4
movdqa %xmm0, %xmm3
movdqa %xmm4, %xmm7
movdqa %xmm3, 0*16(%rax)
movdqa %xmm7, 1*16(%rax)
sha256_sse2_extend_doubleround 2
sha256_sse2_extend_doubleround 4
movdqa -9*16(%rax), %xmm0
movdqa sha256d_4preext2_23(%rip), %xmm4
movdqa %xmm0, %xmm2
psrld $3, %xmm0
movdqa %xmm0, %xmm1
pslld $14, %xmm2
psrld $4, %xmm1
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
psrld $11, %xmm1
pslld $11, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
paddd -10*16(%rax), %xmm0
paddd -9*16(%rax), %xmm4
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
paddd -1*16(%rax), %xmm0
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
paddd 0*16(%rax), %xmm4
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, 6*16(%rax)
movdqa %xmm7, 7*16(%rax)
movdqa sha256d_4preext2_24(%rip), %xmm0
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
paddd 1*16(%rax), %xmm0
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd 2*16(%rax), %xmm7
movdqa %xmm3, 8*16(%rax)
movdqa %xmm7, 9*16(%rax)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd 3*16(%rax), %xmm3
paddd 4*16(%rax), %xmm7
movdqa %xmm3, 10*16(%rax)
movdqa %xmm7, 11*16(%rax)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd 5*16(%rax), %xmm3
paddd 6*16(%rax), %xmm7
movdqa %xmm3, 12*16(%rax)
movdqa %xmm7, 13*16(%rax)
movdqa sha256d_4preext2_30(%rip), %xmm0
movdqa 0*16(%rax), %xmm4
movdqa %xmm4, %xmm6
psrld $3, %xmm4
movdqa %xmm4, %xmm5
pslld $14, %xmm6
psrld $4, %xmm5
pxor %xmm5, %xmm4
pxor %xmm6, %xmm4
psrld $11, %xmm5
pslld $11, %xmm6
pxor %xmm5, %xmm4
pxor %xmm6, %xmm4
paddd -1*16(%rax), %xmm4
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
paddd 7*16(%rax), %xmm0
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
paddd 8*16(%rax), %xmm4
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, 14*16(%rax)
movdqa %xmm7, 15*16(%rax)
jmp sha256d_ms_4way_sse2_extend_loop2
sha256d_ms_4way_sse2_extend_coda2:
sha256_sse2_extend_round 44
movdqa sha256_4h+0(%rip), %xmm7
movdqa sha256_4h+16(%rip), %xmm5
movdqa sha256_4h+32(%rip), %xmm4
movdqa sha256_4h+48(%rip), %xmm3
movdqa sha256_4h+64(%rip), %xmm0
movdqa sha256_4h+80(%rip), %xmm1
movdqa sha256_4h+96(%rip), %xmm2
movdqa sha256_4h+112(%rip), %xmm6
movdqa %xmm1, 0(%rsp)
movdqa %xmm2, 16(%rsp)
movdqa %xmm6, 32(%rsp)
leaq 48(%rsp), %rax
leaq sha256_4k(%rip), %rcx
jmp sha256d_ms_4way_sse2_main_loop2
.macro sha256_sse2_main_round_red i, r7
movdqa 16*\i(%rax), %xmm6
paddd 16*\i(%rcx), %xmm6
paddd 32(%rsp), %xmm6
movdqa %xmm0, %xmm1
movdqa 16(%rsp), %xmm2
paddd \r7, %xmm6
pandn %xmm2, %xmm1
movdqa %xmm2, 32(%rsp)
movdqa 0(%rsp), %xmm2
movdqa %xmm2, 16(%rsp)
pand %xmm0, %xmm2
pxor %xmm2, %xmm1
movdqa %xmm0, 0(%rsp)
paddd %xmm1, %xmm6
movdqa %xmm0, %xmm1
psrld $6, %xmm0
movdqa %xmm0, %xmm2
pslld $7, %xmm1
psrld $5, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
pslld $14, %xmm1
psrld $14, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
pslld $5, %xmm1
pxor %xmm1, %xmm0
paddd %xmm6, %xmm0
.endm
sha256d_ms_4way_sse2_finish:
sha256_sse2_main_round_red 57, %xmm3
sha256_sse2_main_round_red 58, %xmm4
sha256_sse2_main_round_red 59, %xmm5
sha256_sse2_main_round_red 60, %xmm7
paddd sha256_4h+112(%rip), %xmm0
movdqa %xmm0, 112(%rdi)
addq $8+67*16, %rsp
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rsi
movdqa 0(%rsp), %xmm6
movdqa 16(%rsp), %xmm7
addq $32, %rsp
popq %rdi
#endif
ret
#if defined(USE_AVX)
.p2align 6
sha256d_ms_4way_avx:
#if defined(_WIN64) || defined(__CYGWIN__)
pushq %rdi
subq $80, %rsp
movdqa %xmm6, 0(%rsp)
movdqa %xmm7, 16(%rsp)
movdqa %xmm8, 32(%rsp)
movdqa %xmm9, 48(%rsp)
movdqa %xmm10, 64(%rsp)
pushq %rsi
movq %rcx, %rdi
movq %rdx, %rsi
movq %r8, %rdx
movq %r9, %rcx
#endif
subq $1032, %rsp
leaq 256(%rsi), %rax
sha256d_ms_4way_avx_extend_loop1:
vmovdqa 3*16(%rsi), %xmm0
vmovdqa 2*16(%rax), %xmm3
vmovdqa 3*16(%rax), %xmm7
vmovdqa %xmm3, 2*16(%rsp)
vmovdqa %xmm7, 3*16(%rsp)
vpaddd %xmm0, %xmm7, %xmm7
vpslld $14, %xmm0, %xmm2
vpsrld $3, %xmm0, %xmm0
vpsrld $4, %xmm0, %xmm1
vpxor %xmm1, %xmm0, %xmm0
vpxor %xmm2, %xmm0, %xmm0
vpsrld $11, %xmm1, %xmm1
vpslld $11, %xmm2, %xmm2
vpxor %xmm1, %xmm0, %xmm0
vpxor %xmm2, %xmm0, %xmm0
vpaddd %xmm0, %xmm3, %xmm3
vmovdqa %xmm3, 2*16(%rax)
vmovdqa %xmm7, 3*16(%rax)
vmovdqa 4*16(%rax), %xmm0
vmovdqa %xmm0, 4*16(%rsp)
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vmovdqa %xmm3, 4*16(%rax)
vmovdqa %xmm7, 5*16(%rax)
vmovdqa 6*16(%rax), %xmm0
vmovdqa 7*16(%rax), %xmm4
vmovdqa %xmm0, 6*16(%rsp)
vmovdqa %xmm4, 7*16(%rsp)
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, 6*16(%rax)
vmovdqa %xmm7, 7*16(%rax)
vmovdqa 8*16(%rax), %xmm0
vmovdqa 2*16(%rax), %xmm4
vmovdqa %xmm0, 8*16(%rsp)
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, 8*16(%rax)
vmovdqa %xmm7, 9*16(%rax)
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd 3*16(%rax), %xmm3, %xmm3
vpaddd 4*16(%rax), %xmm7, %xmm7
vmovdqa %xmm3, 10*16(%rax)
vmovdqa %xmm7, 11*16(%rax)
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd 5*16(%rax), %xmm3, %xmm3
vpaddd 6*16(%rax), %xmm7, %xmm7
vmovdqa %xmm3, 12*16(%rax)
vmovdqa %xmm7, 13*16(%rax)
vmovdqa 14*16(%rax), %xmm0
vmovdqa 15*16(%rax), %xmm4
vmovdqa %xmm0, 14*16(%rsp)
vmovdqa %xmm4, 15*16(%rsp)
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpaddd 7*16(%rax), %xmm0, %xmm0
vpaddd 8*16(%rax), %xmm4, %xmm4
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, 14*16(%rax)
vmovdqa %xmm7, 15*16(%rax)
sha256d_ms_4way_avx_extend_loop2:
sha256_avx_extend_doubleround 16
sha256_avx_extend_doubleround 18
sha256_avx_extend_doubleround 20
sha256_avx_extend_doubleround 22
sha256_avx_extend_doubleround 24
sha256_avx_extend_doubleround 26
sha256_avx_extend_doubleround 28
sha256_avx_extend_doubleround 30
sha256_avx_extend_doubleround 32
sha256_avx_extend_doubleround 34
sha256_avx_extend_doubleround 36
sha256_avx_extend_doubleround 38
sha256_avx_extend_doubleround 40
sha256_avx_extend_doubleround 42
jz sha256d_ms_4way_avx_extend_coda2
sha256_avx_extend_doubleround 44
sha256_avx_extend_doubleround 46
movdqa 0(%rcx), %xmm7
movdqa 16(%rcx), %xmm8
movdqa 32(%rcx), %xmm9
movdqa 48(%rcx), %xmm10
movdqa 64(%rcx), %xmm0
movdqa 80(%rcx), %xmm5
movdqa 96(%rcx), %xmm4
movdqa 112(%rcx), %xmm3
movq %rsi, %rax
leaq sha256_4k(%rip), %rcx
jmp sha256d_ms_4way_avx_main_loop1
sha256d_ms_4way_avx_main_loop2:
sha256_avx_main_round 0, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7
sha256_avx_main_round 1, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3
sha256_avx_main_round 2, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4
sha256d_ms_4way_avx_main_loop1:
sha256_avx_main_round 3, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5
sha256_avx_main_quadround 4
sha256_avx_main_quadround 8
sha256_avx_main_quadround 12
sha256_avx_main_quadround 16
sha256_avx_main_quadround 20
sha256_avx_main_quadround 24
sha256_avx_main_quadround 28
sha256_avx_main_quadround 32
sha256_avx_main_quadround 36
sha256_avx_main_quadround 40
sha256_avx_main_quadround 44
sha256_avx_main_quadround 48
sha256_avx_main_quadround 52
sha256_avx_main_round 56, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7
jz sha256d_ms_4way_avx_finish
sha256_avx_main_round 57, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3
sha256_avx_main_round 58, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4
sha256_avx_main_round 59, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5
sha256_avx_main_quadround 60
movdqa 2*16(%rsp), %xmm1
movdqa 3*16(%rsp), %xmm2
movdqa 4*16(%rsp), %xmm6
movdqa %xmm1, 18*16(%rsi)
movdqa %xmm2, 19*16(%rsi)
movdqa %xmm6, 20*16(%rsi)
movdqa 6*16(%rsp), %xmm1
movdqa 7*16(%rsp), %xmm2
movdqa 8*16(%rsp), %xmm6
movdqa %xmm1, 22*16(%rsi)
movdqa %xmm2, 23*16(%rsi)
movdqa %xmm6, 24*16(%rsi)
movdqa 14*16(%rsp), %xmm1
movdqa 15*16(%rsp), %xmm2
movdqa %xmm1, 30*16(%rsi)
movdqa %xmm2, 31*16(%rsi)
paddd 0(%rdx), %xmm7
paddd 16(%rdx), %xmm5
paddd 32(%rdx), %xmm4
paddd 48(%rdx), %xmm3
paddd 64(%rdx), %xmm0
paddd 80(%rdx), %xmm8
paddd 96(%rdx), %xmm9
paddd 112(%rdx), %xmm10
movdqa %xmm7, 0(%rsp)
movdqa %xmm5, 16(%rsp)
movdqa %xmm4, 32(%rsp)
movdqa %xmm3, 48(%rsp)
movdqa %xmm0, 64(%rsp)
movdqa %xmm8, 80(%rsp)
movdqa %xmm9, 96(%rsp)
movdqa %xmm10, 112(%rsp)
pxor %xmm0, %xmm0
movq $0x8000000000000100, %rax
movd %rax, %xmm1
pshufd $0x55, %xmm1, %xmm2
pshufd $0x00, %xmm1, %xmm1
movdqa %xmm2, 128(%rsp)
movdqa %xmm0, 144(%rsp)
movdqa %xmm0, 160(%rsp)
movdqa %xmm0, 176(%rsp)
movdqa %xmm0, 192(%rsp)
movdqa %xmm0, 208(%rsp)
movdqa %xmm0, 224(%rsp)
movdqa %xmm1, 240(%rsp)
leaq 256(%rsp), %rax
cmpq %rax, %rax
vmovdqa -15*16(%rax), %xmm0
vmovdqa -14*16(%rax), %xmm4
vpslld $14, %xmm0, %xmm2
vpslld $14, %xmm4, %xmm6
vpsrld $3, %xmm0, %xmm8
vpsrld $3, %xmm4, %xmm4
vpsrld $7, %xmm0, %xmm1
vpsrld $4, %xmm4, %xmm5
vpxor %xmm1, %xmm8, %xmm8
vpxor %xmm5, %xmm4, %xmm4
vpsrld $11, %xmm1, %xmm1
vpsrld $11, %xmm5, %xmm5
vpxor %xmm2, %xmm8, %xmm8
vpxor %xmm6, %xmm4, %xmm4
vpslld $11, %xmm2, %xmm2
vpslld $11, %xmm6, %xmm6
vpxor %xmm1, %xmm8, %xmm8
vpxor %xmm5, %xmm4, %xmm4
vpxor %xmm2, %xmm8, %xmm8
vpxor %xmm6, %xmm4, %xmm4
vpaddd %xmm0, %xmm4, %xmm4
vpaddd -16*16(%rax), %xmm8, %xmm3
vpaddd sha256d_4preext2_17(%rip), %xmm4, %xmm7
vmovdqa %xmm3, 0*16(%rax)
vmovdqa %xmm7, 1*16(%rax)
sha256_avx_extend_doubleround 2
sha256_avx_extend_doubleround 4
vmovdqa -9*16(%rax), %xmm0
vpslld $14, %xmm0, %xmm2
vpsrld $3, %xmm0, %xmm8
vpsrld $7, %xmm0, %xmm1
vpxor %xmm1, %xmm8, %xmm8
vpxor %xmm2, %xmm8, %xmm8
vpsrld $11, %xmm1, %xmm1
vpslld $11, %xmm2, %xmm2
vpxor %xmm1, %xmm8, %xmm8
vpxor %xmm2, %xmm8, %xmm8
vpaddd sha256d_4preext2_23(%rip), %xmm0, %xmm4
vpaddd -10*16(%rax), %xmm8, %xmm0
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpaddd -1*16(%rax), %xmm0, %xmm0
vpaddd 0*16(%rax), %xmm4, %xmm4
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, 6*16(%rax)
vmovdqa %xmm7, 7*16(%rax)
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd sha256d_4preext2_24(%rip), %xmm3, %xmm3
vpaddd 1*16(%rax), %xmm3, %xmm3
vpaddd 2*16(%rax), %xmm7, %xmm7
vmovdqa %xmm3, 8*16(%rax)
vmovdqa %xmm7, 9*16(%rax)
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd 3*16(%rax), %xmm3, %xmm3
vpaddd 4*16(%rax), %xmm7, %xmm7
vmovdqa %xmm3, 10*16(%rax)
vmovdqa %xmm7, 11*16(%rax)
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd 5*16(%rax), %xmm3, %xmm3
vpaddd 6*16(%rax), %xmm7, %xmm7
vmovdqa %xmm3, 12*16(%rax)
vmovdqa %xmm7, 13*16(%rax)
vmovdqa sha256d_4preext2_30(%rip), %xmm0
vmovdqa 0*16(%rax), %xmm4
vpslld $14, %xmm4, %xmm6
vpsrld $3, %xmm4, %xmm4
vpsrld $4, %xmm4, %xmm5
vpxor %xmm5, %xmm4, %xmm4
vpxor %xmm6, %xmm4, %xmm4
vpsrld $11, %xmm5, %xmm5
vpslld $11, %xmm6, %xmm6
vpxor %xmm5, %xmm4, %xmm4
vpxor %xmm6, %xmm4, %xmm4
vpaddd -1*16(%rax), %xmm4, %xmm4
vpslld $13, %xmm3, %xmm2
vpslld $13, %xmm7, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpaddd 7*16(%rax), %xmm0, %xmm0
vpaddd 8*16(%rax), %xmm4, %xmm4
vpsrld $7, %xmm3, %xmm1
vpsrld $7, %xmm7, %xmm5
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpsrld $2, %xmm1, %xmm1
vpsrld $2, %xmm5, %xmm5
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpslld $2, %xmm2, %xmm2
vpslld $2, %xmm6, %xmm6
vpxor %xmm1, %xmm3, %xmm3
vpxor %xmm5, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, 14*16(%rax)
vmovdqa %xmm7, 15*16(%rax)
jmp sha256d_ms_4way_avx_extend_loop2
sha256d_ms_4way_avx_extend_coda2:
sha256_avx_extend_round 44
movdqa sha256_4h+0(%rip), %xmm7
movdqa sha256_4h+16(%rip), %xmm5
movdqa sha256_4h+32(%rip), %xmm4
movdqa sha256_4h+48(%rip), %xmm3
movdqa sha256_4h+64(%rip), %xmm0
movdqa sha256_4h+80(%rip), %xmm8
movdqa sha256_4h+96(%rip), %xmm9
movdqa sha256_4h+112(%rip), %xmm10
movq %rsp, %rax
leaq sha256_4k(%rip), %rcx
jmp sha256d_ms_4way_avx_main_loop2
.macro sha256_avx_main_round_red i, r0, r1, r2, r3, r4
vpaddd 16*\i(%rax), \r0, %xmm6
vpaddd 16*\i(%rcx), %xmm6, %xmm6
vpandn \r1, \r3, %xmm1
vpand \r3, \r2, %xmm2
vpxor %xmm2, %xmm1, %xmm1
vpaddd %xmm1, %xmm6, %xmm6
vpslld $7, \r3, %xmm1
vpsrld $6, \r3, \r0
vpsrld $5, \r0, %xmm2
vpxor %xmm1, \r0, \r0
vpxor %xmm2, \r0, \r0
vpslld $14, %xmm1, %xmm1
vpsrld $14, %xmm2, %xmm2
vpxor %xmm1, \r0, \r0
vpxor %xmm2, \r0, \r0
vpslld $5, %xmm1, %xmm1
vpxor %xmm1, \r0, \r0
vpaddd \r0, %xmm6, %xmm6
vpaddd %xmm6, \r4, \r0
.endm
sha256d_ms_4way_avx_finish:
sha256_avx_main_round_red 57, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4
sha256_avx_main_round_red 58, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5
sha256_avx_main_round_red 59, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7
sha256_avx_main_round_red 60, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3
paddd sha256_4h+112(%rip), %xmm10
movdqa %xmm10, 112(%rdi)
addq $1032, %rsp
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rsi
movdqa 0(%rsp), %xmm6
movdqa 16(%rsp), %xmm7
movdqa 32(%rsp), %xmm8
movdqa 48(%rsp), %xmm9
movdqa 64(%rsp), %xmm10
addq $80, %rsp
popq %rdi
#endif
ret
#endif /* USE_AVX */
#if defined(USE_XOP)
.p2align 6
sha256d_ms_4way_xop:
#if defined(_WIN64) || defined(__CYGWIN__)
pushq %rdi
subq $80, %rsp
movdqa %xmm6, 0(%rsp)
movdqa %xmm7, 16(%rsp)
movdqa %xmm8, 32(%rsp)
movdqa %xmm9, 48(%rsp)
movdqa %xmm10, 64(%rsp)
pushq %rsi
movq %rcx, %rdi
movq %rdx, %rsi
movq %r8, %rdx
movq %r9, %rcx
#endif
subq $1032, %rsp
leaq 256(%rsi), %rax
sha256d_ms_4way_xop_extend_loop1:
vmovdqa 3*16(%rsi), %xmm0
vmovdqa 2*16(%rax), %xmm3
vmovdqa 3*16(%rax), %xmm7
vmovdqa %xmm3, 2*16(%rsp)
vmovdqa %xmm7, 3*16(%rsp)
vpaddd %xmm0, %xmm7, %xmm7
vprotd $25, %xmm0, %xmm1
vprotd $14, %xmm0, %xmm2
vpsrld $3, %xmm0, %xmm0
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm2, %xmm0, %xmm0
vpaddd %xmm0, %xmm3, %xmm3
vmovdqa %xmm3, 2*16(%rax)
vmovdqa %xmm7, 3*16(%rax)
vmovdqa 4*16(%rax), %xmm0
vmovdqa %xmm0, 4*16(%rsp)
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vmovdqa %xmm3, 4*16(%rax)
vmovdqa %xmm7, 5*16(%rax)
vmovdqa 6*16(%rax), %xmm0
vmovdqa 7*16(%rax), %xmm4
vmovdqa %xmm0, 6*16(%rsp)
vmovdqa %xmm4, 7*16(%rsp)
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, 6*16(%rax)
vmovdqa %xmm7, 7*16(%rax)
vmovdqa 8*16(%rax), %xmm0
vmovdqa 2*16(%rax), %xmm4
vmovdqa %xmm0, 8*16(%rsp)
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, 8*16(%rax)
vmovdqa %xmm7, 9*16(%rax)
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd 3*16(%rax), %xmm3, %xmm3
vpaddd 4*16(%rax), %xmm7, %xmm7
vmovdqa %xmm3, 10*16(%rax)
vmovdqa %xmm7, 11*16(%rax)
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd 5*16(%rax), %xmm3, %xmm3
vpaddd 6*16(%rax), %xmm7, %xmm7
vmovdqa %xmm3, 12*16(%rax)
vmovdqa %xmm7, 13*16(%rax)
vmovdqa 14*16(%rax), %xmm0
vmovdqa 15*16(%rax), %xmm4
vmovdqa %xmm0, 14*16(%rsp)
vmovdqa %xmm4, 15*16(%rsp)
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpaddd 7*16(%rax), %xmm0, %xmm0
vpaddd 8*16(%rax), %xmm4, %xmm4
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, 14*16(%rax)
vmovdqa %xmm7, 15*16(%rax)
sha256d_ms_4way_xop_extend_loop2:
sha256_xop_extend_doubleround 16
sha256_xop_extend_doubleround 18
sha256_xop_extend_doubleround 20
sha256_xop_extend_doubleround 22
sha256_xop_extend_doubleround 24
sha256_xop_extend_doubleround 26
sha256_xop_extend_doubleround 28
sha256_xop_extend_doubleround 30
sha256_xop_extend_doubleround 32
sha256_xop_extend_doubleround 34
sha256_xop_extend_doubleround 36
sha256_xop_extend_doubleround 38
sha256_xop_extend_doubleround 40
sha256_xop_extend_doubleround 42
jz sha256d_ms_4way_xop_extend_coda2
sha256_xop_extend_doubleround 44
sha256_xop_extend_doubleround 46
movdqa 0(%rcx), %xmm7
movdqa 16(%rcx), %xmm8
movdqa 32(%rcx), %xmm9
movdqa 48(%rcx), %xmm10
movdqa 64(%rcx), %xmm0
movdqa 80(%rcx), %xmm5
movdqa 96(%rcx), %xmm4
movdqa 112(%rcx), %xmm3
movq %rsi, %rax
leaq sha256_4k(%rip), %rcx
jmp sha256d_ms_4way_xop_main_loop1
sha256d_ms_4way_xop_main_loop2:
sha256_xop_main_round 0, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7
sha256_xop_main_round 1, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3
sha256_xop_main_round 2, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4
sha256d_ms_4way_xop_main_loop1:
sha256_xop_main_round 3, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5
sha256_xop_main_quadround 4
sha256_xop_main_quadround 8
sha256_xop_main_quadround 12
sha256_xop_main_quadround 16
sha256_xop_main_quadround 20
sha256_xop_main_quadround 24
sha256_xop_main_quadround 28
sha256_xop_main_quadround 32
sha256_xop_main_quadround 36
sha256_xop_main_quadround 40
sha256_xop_main_quadround 44
sha256_xop_main_quadround 48
sha256_xop_main_quadround 52
sha256_xop_main_round 56, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7
jz sha256d_ms_4way_xop_finish
sha256_xop_main_round 57, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3
sha256_xop_main_round 58, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4
sha256_xop_main_round 59, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5
sha256_xop_main_quadround 60
movdqa 2*16(%rsp), %xmm1
movdqa 3*16(%rsp), %xmm2
movdqa 4*16(%rsp), %xmm6
movdqa %xmm1, 18*16(%rsi)
movdqa %xmm2, 19*16(%rsi)
movdqa %xmm6, 20*16(%rsi)
movdqa 6*16(%rsp), %xmm1
movdqa 7*16(%rsp), %xmm2
movdqa 8*16(%rsp), %xmm6
movdqa %xmm1, 22*16(%rsi)
movdqa %xmm2, 23*16(%rsi)
movdqa %xmm6, 24*16(%rsi)
movdqa 14*16(%rsp), %xmm1
movdqa 15*16(%rsp), %xmm2
movdqa %xmm1, 30*16(%rsi)
movdqa %xmm2, 31*16(%rsi)
paddd 0(%rdx), %xmm7
paddd 16(%rdx), %xmm5
paddd 32(%rdx), %xmm4
paddd 48(%rdx), %xmm3
paddd 64(%rdx), %xmm0
paddd 80(%rdx), %xmm8
paddd 96(%rdx), %xmm9
paddd 112(%rdx), %xmm10
movdqa %xmm7, 0(%rsp)
movdqa %xmm5, 16(%rsp)
movdqa %xmm4, 32(%rsp)
movdqa %xmm3, 48(%rsp)
movdqa %xmm0, 64(%rsp)
movdqa %xmm8, 80(%rsp)
movdqa %xmm9, 96(%rsp)
movdqa %xmm10, 112(%rsp)
pxor %xmm0, %xmm0
movq $0x8000000000000100, %rax
movd %rax, %xmm1
pshufd $0x55, %xmm1, %xmm2
pshufd $0x00, %xmm1, %xmm1
movdqa %xmm2, 128(%rsp)
movdqa %xmm0, 144(%rsp)
movdqa %xmm0, 160(%rsp)
movdqa %xmm0, 176(%rsp)
movdqa %xmm0, 192(%rsp)
movdqa %xmm0, 208(%rsp)
movdqa %xmm0, 224(%rsp)
movdqa %xmm1, 240(%rsp)
leaq 256(%rsp), %rax
cmpq %rax, %rax
vmovdqa -15*16(%rax), %xmm0
vmovdqa -14*16(%rax), %xmm4
vprotd $25, %xmm0, %xmm1
vprotd $25, %xmm4, %xmm5
vprotd $14, %xmm0, %xmm2
vprotd $14, %xmm4, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpsrld $3, %xmm0, %xmm8
vpsrld $3, %xmm4, %xmm4
vpxor %xmm2, %xmm8, %xmm8
vpxor %xmm6, %xmm4, %xmm4
vpaddd %xmm0, %xmm4, %xmm4
vpaddd -16*16(%rax), %xmm8, %xmm3
vpaddd sha256d_4preext2_17(%rip), %xmm4, %xmm7
vmovdqa %xmm3, 0*16(%rax)
vmovdqa %xmm7, 1*16(%rax)
sha256_xop_extend_doubleround 2
sha256_xop_extend_doubleround 4
vmovdqa -9*16(%rax), %xmm0
vprotd $25, %xmm0, %xmm1
vprotd $14, %xmm0, %xmm2
vpsrld $3, %xmm0, %xmm8
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm2, %xmm8, %xmm8
vpaddd sha256d_4preext2_23(%rip), %xmm0, %xmm4
vpaddd -10*16(%rax), %xmm8, %xmm0
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpaddd -1*16(%rax), %xmm0, %xmm0
vpaddd 0*16(%rax), %xmm4, %xmm4
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, 6*16(%rax)
vmovdqa %xmm7, 7*16(%rax)
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd sha256d_4preext2_24(%rip), %xmm3, %xmm3
vpaddd 1*16(%rax), %xmm3, %xmm3
vpaddd 2*16(%rax), %xmm7, %xmm7
vmovdqa %xmm3, 8*16(%rax)
vmovdqa %xmm7, 9*16(%rax)
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd 3*16(%rax), %xmm3, %xmm3
vpaddd 4*16(%rax), %xmm7, %xmm7
vmovdqa %xmm3, 10*16(%rax)
vmovdqa %xmm7, 11*16(%rax)
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd 5*16(%rax), %xmm3, %xmm3
vpaddd 6*16(%rax), %xmm7, %xmm7
vmovdqa %xmm3, 12*16(%rax)
vmovdqa %xmm7, 13*16(%rax)
vmovdqa sha256d_4preext2_30(%rip), %xmm0
vmovdqa 0*16(%rax), %xmm4
vprotd $25, %xmm4, %xmm5
vprotd $14, %xmm4, %xmm6
vpxor %xmm5, %xmm6, %xmm6
vpsrld $3, %xmm4, %xmm4
vpxor %xmm6, %xmm4, %xmm4
vpaddd -1*16(%rax), %xmm4, %xmm4
vprotd $15, %xmm3, %xmm1
vprotd $15, %xmm7, %xmm5
vprotd $13, %xmm3, %xmm2
vprotd $13, %xmm7, %xmm6
vpxor %xmm1, %xmm2, %xmm2
vpxor %xmm5, %xmm6, %xmm6
vpaddd 7*16(%rax), %xmm0, %xmm0
vpaddd 8*16(%rax), %xmm4, %xmm4
vpsrld $10, %xmm3, %xmm3
vpsrld $10, %xmm7, %xmm7
vpxor %xmm2, %xmm3, %xmm3
vpxor %xmm6, %xmm7, %xmm7
vpaddd %xmm0, %xmm3, %xmm3
vpaddd %xmm4, %xmm7, %xmm7
vmovdqa %xmm3, 14*16(%rax)
vmovdqa %xmm7, 15*16(%rax)
jmp sha256d_ms_4way_xop_extend_loop2
sha256d_ms_4way_xop_extend_coda2:
sha256_xop_extend_round 44
movdqa sha256_4h+0(%rip), %xmm7
movdqa sha256_4h+16(%rip), %xmm5
movdqa sha256_4h+32(%rip), %xmm4
movdqa sha256_4h+48(%rip), %xmm3
movdqa sha256_4h+64(%rip), %xmm0
movdqa sha256_4h+80(%rip), %xmm8
movdqa sha256_4h+96(%rip), %xmm9
movdqa sha256_4h+112(%rip), %xmm10
movq %rsp, %rax
leaq sha256_4k(%rip), %rcx
jmp sha256d_ms_4way_xop_main_loop2
.macro sha256_xop_main_round_red i, r0, r1, r2, r3, r4
vpaddd 16*\i(%rax), \r0, %xmm6
vpaddd 16*\i(%rcx), %xmm6, %xmm6
vpandn \r1, \r3, %xmm1
vpand \r3, \r2, %xmm2
vpxor %xmm2, %xmm1, %xmm1
vpaddd %xmm1, %xmm6, %xmm6
vprotd $26, \r3, %xmm1
vprotd $21, \r3, %xmm2
vpxor %xmm1, %xmm2, %xmm2
vprotd $7, \r3, \r0
vpxor %xmm2, \r0, \r0
vpaddd \r0, %xmm6, %xmm6
vpaddd %xmm6, \r4, \r0
.endm
sha256d_ms_4way_xop_finish:
sha256_xop_main_round_red 57, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4
sha256_xop_main_round_red 58, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5
sha256_xop_main_round_red 59, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7
sha256_xop_main_round_red 60, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3
paddd sha256_4h+112(%rip), %xmm10
movdqa %xmm10, 112(%rdi)
addq $1032, %rsp
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rsi
movdqa 0(%rsp), %xmm6
movdqa 16(%rsp), %xmm7
movdqa 32(%rsp), %xmm8
movdqa 48(%rsp), %xmm9
movdqa 64(%rsp), %xmm10
addq $80, %rsp
popq %rdi
#endif
ret
#endif /* USE_XOP */
.text
.p2align 6
.globl sha256_use_4way
.globl _sha256_use_4way
sha256_use_4way:
_sha256_use_4way:
pushq %rbx
pushq %rcx
pushq %rdx
/* Check for VIA PadLock Hash Engine */
movl $0xc0000000, %eax
cpuid
cmpl $0xc0000001, %eax
jb sha256_use_4way_no_phe
movl $0xc0000001, %eax
cpuid
andl $0x00000c00, %edx
cmpl $0x00000c00, %edx
jne sha256_use_4way_no_phe
leaq sha256_transform_phe(%rip), %rdx
movq %rdx, sha256_transform_addr(%rip)
xorl %eax, %eax
jmp sha256_use_4way_exit
sha256_use_4way_no_phe:
#if defined(USE_AVX)
/* Check for AVX and OSXSAVE support */
movl $1, %eax
cpuid
andl $0x18000000, %ecx
cmpl $0x18000000, %ecx
jne sha256_use_4way_base
/* Check for XMM and YMM state support */
xorl %ecx, %ecx
xgetbv
andl $0x00000006, %eax
cmpl $0x00000006, %eax
jne sha256_use_4way_base
#if defined(USE_XOP)
/* Check for XOP support */
movl $0x80000001, %eax
cpuid
andl $0x00000800, %ecx
jz sha256_use_4way_avx
sha256_use_4way_xop:
leaq sha256d_ms_4way_xop(%rip), %rcx
leaq sha256_transform_4way_core_xop(%rip), %rdx
jmp sha256_use_4way_done
#endif /* USE_XOP */
sha256_use_4way_avx:
leaq sha256d_ms_4way_avx(%rip), %rcx
leaq sha256_transform_4way_core_avx(%rip), %rdx
jmp sha256_use_4way_done
#endif /* USE_AVX */
sha256_use_4way_base:
leaq sha256d_ms_4way_sse2(%rip), %rcx
leaq sha256_transform_4way_core_sse2(%rip), %rdx
sha256_use_4way_done:
movq %rcx, sha256d_ms_4way_addr(%rip)
movq %rdx, sha256_transform_4way_core_addr(%rip)
movl $1, %eax
sha256_use_4way_exit:
popq %rdx
popq %rcx
popq %rbx
ret
#if defined(USE_AVX2)
.text
.p2align 6
.globl sha256d_ms_8way
.globl _sha256d_ms_8way
sha256d_ms_8way:
_sha256d_ms_8way:
sha256d_ms_8way_avx2:
#if defined(_WIN64) || defined(__CYGWIN__)
pushq %rdi
subq $80, %rsp
vmovdqa %xmm6, 0(%rsp)
vmovdqa %xmm7, 16(%rsp)
vmovdqa %xmm8, 32(%rsp)
vmovdqa %xmm9, 48(%rsp)
vmovdqa %xmm10, 64(%rsp)
pushq %rsi
movq %rcx, %rdi
movq %rdx, %rsi
movq %r8, %rdx
movq %r9, %rcx
#endif
pushq %rbp
movq %rsp, %rbp
subq $64*32, %rsp
andq $-128, %rsp
leaq 16*32(%rsi), %rax
sha256d_ms_8way_avx2_extend_loop1:
vmovdqa 3*32(%rsi), %ymm0
vmovdqa 2*32(%rax), %ymm3
vmovdqa 3*32(%rax), %ymm7
vmovdqa %ymm3, 2*32(%rsp)
vmovdqa %ymm7, 3*32(%rsp)
vpaddd %ymm0, %ymm7, %ymm7
vpslld $14, %ymm0, %ymm2
vpsrld $3, %ymm0, %ymm0
vpsrld $4, %ymm0, %ymm1
vpxor %ymm1, %ymm0, %ymm0
vpxor %ymm2, %ymm0, %ymm0
vpsrld $11, %ymm1, %ymm1
vpslld $11, %ymm2, %ymm2
vpxor %ymm1, %ymm0, %ymm0
vpxor %ymm2, %ymm0, %ymm0
vpaddd %ymm0, %ymm3, %ymm3
vmovdqa %ymm3, 2*32(%rax)
vmovdqa %ymm7, 3*32(%rax)
vmovdqa 4*32(%rax), %ymm0
vmovdqa %ymm0, 4*32(%rsp)
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd %ymm0, %ymm3, %ymm3
vmovdqa %ymm3, 4*32(%rax)
vmovdqa %ymm7, 5*32(%rax)
vmovdqa 6*32(%rax), %ymm0
vmovdqa 7*32(%rax), %ymm4
vmovdqa %ymm0, 6*32(%rsp)
vmovdqa %ymm4, 7*32(%rsp)
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd %ymm0, %ymm3, %ymm3
vpaddd %ymm4, %ymm7, %ymm7
vmovdqa %ymm3, 6*32(%rax)
vmovdqa %ymm7, 7*32(%rax)
vmovdqa 8*32(%rax), %ymm0
vmovdqa 2*32(%rax), %ymm4
vmovdqa %ymm0, 8*32(%rsp)
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd %ymm0, %ymm3, %ymm3
vpaddd %ymm4, %ymm7, %ymm7
vmovdqa %ymm3, 8*32(%rax)
vmovdqa %ymm7, 9*32(%rax)
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd 3*32(%rax), %ymm3, %ymm3
vpaddd 4*32(%rax), %ymm7, %ymm7
vmovdqa %ymm3, 10*32(%rax)
vmovdqa %ymm7, 11*32(%rax)
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd 5*32(%rax), %ymm3, %ymm3
vpaddd 6*32(%rax), %ymm7, %ymm7
vmovdqa %ymm3, 12*32(%rax)
vmovdqa %ymm7, 13*32(%rax)
vmovdqa 14*32(%rax), %ymm0
vmovdqa 15*32(%rax), %ymm4
vmovdqa %ymm0, 14*32(%rsp)
vmovdqa %ymm4, 15*32(%rsp)
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpaddd 7*32(%rax), %ymm0, %ymm0
vpaddd 8*32(%rax), %ymm4, %ymm4
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd %ymm0, %ymm3, %ymm3
vpaddd %ymm4, %ymm7, %ymm7
vmovdqa %ymm3, 14*32(%rax)
vmovdqa %ymm7, 15*32(%rax)
sha256d_ms_8way_avx2_extend_loop2:
sha256_avx2_extend_doubleround 16
sha256_avx2_extend_doubleround 18
sha256_avx2_extend_doubleround 20
sha256_avx2_extend_doubleround 22
sha256_avx2_extend_doubleround 24
sha256_avx2_extend_doubleround 26
sha256_avx2_extend_doubleround 28
sha256_avx2_extend_doubleround 30
sha256_avx2_extend_doubleround 32
sha256_avx2_extend_doubleround 34
sha256_avx2_extend_doubleround 36
sha256_avx2_extend_doubleround 38
sha256_avx2_extend_doubleround 40
sha256_avx2_extend_doubleround 42
jz sha256d_ms_8way_avx2_extend_coda2
sha256_avx2_extend_doubleround 44
sha256_avx2_extend_doubleround 46
vmovdqa 0(%rcx), %ymm7
vmovdqa 32(%rcx), %ymm8
vmovdqa 64(%rcx), %ymm9
vmovdqa 96(%rcx), %ymm10
vmovdqa 128(%rcx), %ymm0
vmovdqa 160(%rcx), %ymm5
vmovdqa 192(%rcx), %ymm4
vmovdqa 224(%rcx), %ymm3
movq %rsi, %rax
leaq sha256_8k(%rip), %rcx
jmp sha256d_ms_8way_avx2_main_loop1
sha256d_ms_8way_avx2_main_loop2:
sha256_avx2_main_round 0, %ymm10, %ymm9, %ymm8, %ymm0, %ymm3, %ymm4, %ymm5, %ymm7
sha256_avx2_main_round 1, %ymm9, %ymm8, %ymm0, %ymm10, %ymm4, %ymm5, %ymm7, %ymm3
sha256_avx2_main_round 2, %ymm8, %ymm0, %ymm10, %ymm9, %ymm5, %ymm7, %ymm3, %ymm4
sha256d_ms_8way_avx2_main_loop1:
sha256_avx2_main_round 3, %ymm0, %ymm10, %ymm9, %ymm8, %ymm7, %ymm3, %ymm4, %ymm5
sha256_avx2_main_quadround 4
sha256_avx2_main_quadround 8
sha256_avx2_main_quadround 12
sha256_avx2_main_quadround 16
sha256_avx2_main_quadround 20
sha256_avx2_main_quadround 24
sha256_avx2_main_quadround 28
sha256_avx2_main_quadround 32
sha256_avx2_main_quadround 36
sha256_avx2_main_quadround 40
sha256_avx2_main_quadround 44
sha256_avx2_main_quadround 48
sha256_avx2_main_quadround 52
sha256_avx2_main_round 56, %ymm10, %ymm9, %ymm8, %ymm0, %ymm3, %ymm4, %ymm5, %ymm7
jz sha256d_ms_8way_avx2_finish
sha256_avx2_main_round 57, %ymm9, %ymm8, %ymm0, %ymm10, %ymm4, %ymm5, %ymm7, %ymm3
sha256_avx2_main_round 58, %ymm8, %ymm0, %ymm10, %ymm9, %ymm5, %ymm7, %ymm3, %ymm4
sha256_avx2_main_round 59, %ymm0, %ymm10, %ymm9, %ymm8, %ymm7, %ymm3, %ymm4, %ymm5
sha256_avx2_main_quadround 60
vmovdqa 2*32(%rsp), %ymm1
vmovdqa 3*32(%rsp), %ymm2
vmovdqa 4*32(%rsp), %ymm6
vmovdqa %ymm1, 18*32(%rsi)
vmovdqa %ymm2, 19*32(%rsi)
vmovdqa %ymm6, 20*32(%rsi)
vmovdqa 6*32(%rsp), %ymm1
vmovdqa 7*32(%rsp), %ymm2
vmovdqa 8*32(%rsp), %ymm6
vmovdqa %ymm1, 22*32(%rsi)
vmovdqa %ymm2, 23*32(%rsi)
vmovdqa %ymm6, 24*32(%rsi)
vmovdqa 14*32(%rsp), %ymm1
vmovdqa 15*32(%rsp), %ymm2
vmovdqa %ymm1, 30*32(%rsi)
vmovdqa %ymm2, 31*32(%rsi)
vpaddd 0(%rdx), %ymm7, %ymm7
vpaddd 32(%rdx), %ymm5, %ymm5
vpaddd 64(%rdx), %ymm4, %ymm4
vpaddd 96(%rdx), %ymm3, %ymm3
vpaddd 128(%rdx), %ymm0, %ymm0
vpaddd 160(%rdx), %ymm8, %ymm8
vpaddd 192(%rdx), %ymm9, %ymm9
vpaddd 224(%rdx), %ymm10, %ymm10
vmovdqa %ymm7, 0(%rsp)
vmovdqa %ymm5, 32(%rsp)
vmovdqa %ymm4, 64(%rsp)
vmovdqa %ymm3, 96(%rsp)
vmovdqa %ymm0, 128(%rsp)
vmovdqa %ymm8, 160(%rsp)
vmovdqa %ymm9, 192(%rsp)
vmovdqa %ymm10, 224(%rsp)
vpxor %ymm0, %ymm0, %ymm0
movq $0x8000000000000100, %rax
vmovd %rax, %xmm1
vinserti128 $1, %xmm1, %ymm1, %ymm1
vpshufd $0x55, %ymm1, %ymm2
vpshufd $0x00, %ymm1, %ymm1
vmovdqa %ymm2, 8*32(%rsp)
vmovdqa %ymm0, 9*32(%rsp)
vmovdqa %ymm0, 10*32(%rsp)
vmovdqa %ymm0, 11*32(%rsp)
vmovdqa %ymm0, 12*32(%rsp)
vmovdqa %ymm0, 13*32(%rsp)
vmovdqa %ymm0, 14*32(%rsp)
vmovdqa %ymm1, 15*32(%rsp)
leaq 16*32(%rsp), %rax
cmpq %rax, %rax
vmovdqa -15*32(%rax), %ymm0
vmovdqa -14*32(%rax), %ymm4
vpslld $14, %ymm0, %ymm2
vpslld $14, %ymm4, %ymm6
vpsrld $3, %ymm0, %ymm8
vpsrld $3, %ymm4, %ymm4
vpsrld $7, %ymm0, %ymm1
vpsrld $4, %ymm4, %ymm5
vpxor %ymm1, %ymm8, %ymm8
vpxor %ymm5, %ymm4, %ymm4
vpsrld $11, %ymm1, %ymm1
vpsrld $11, %ymm5, %ymm5
vpxor %ymm2, %ymm8, %ymm8
vpxor %ymm6, %ymm4, %ymm4
vpslld $11, %ymm2, %ymm2
vpslld $11, %ymm6, %ymm6
vpxor %ymm1, %ymm8, %ymm8
vpxor %ymm5, %ymm4, %ymm4
vpxor %ymm2, %ymm8, %ymm8
vpxor %ymm6, %ymm4, %ymm4
vpaddd %ymm0, %ymm4, %ymm4
vpaddd -16*32(%rax), %ymm8, %ymm3
vpaddd sha256d_8preext2_17(%rip), %ymm4, %ymm7
vmovdqa %ymm3, 0*32(%rax)
vmovdqa %ymm7, 1*32(%rax)
sha256_avx2_extend_doubleround 2
sha256_avx2_extend_doubleround 4
vmovdqa -9*32(%rax), %ymm0
vpslld $14, %ymm0, %ymm2
vpsrld $3, %ymm0, %ymm8
vpsrld $7, %ymm0, %ymm1
vpxor %ymm1, %ymm8, %ymm8
vpxor %ymm2, %ymm8, %ymm8
vpsrld $11, %ymm1, %ymm1
vpslld $11, %ymm2, %ymm2
vpxor %ymm1, %ymm8, %ymm8
vpxor %ymm2, %ymm8, %ymm8
vpaddd sha256d_8preext2_23(%rip), %ymm0, %ymm4
vpaddd -10*32(%rax), %ymm8, %ymm0
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpaddd -1*32(%rax), %ymm0, %ymm0
vpaddd 0*32(%rax), %ymm4, %ymm4
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd %ymm0, %ymm3, %ymm3
vpaddd %ymm4, %ymm7, %ymm7
vmovdqa %ymm3, 6*32(%rax)
vmovdqa %ymm7, 7*32(%rax)
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd sha256d_8preext2_24(%rip), %ymm3, %ymm3
vpaddd 1*32(%rax), %ymm3, %ymm3
vpaddd 2*32(%rax), %ymm7, %ymm7
vmovdqa %ymm3, 8*32(%rax)
vmovdqa %ymm7, 9*32(%rax)
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd 3*32(%rax), %ymm3, %ymm3
vpaddd 4*32(%rax), %ymm7, %ymm7
vmovdqa %ymm3, 10*32(%rax)
vmovdqa %ymm7, 11*32(%rax)
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd 5*32(%rax), %ymm3, %ymm3
vpaddd 6*32(%rax), %ymm7, %ymm7
vmovdqa %ymm3, 12*32(%rax)
vmovdqa %ymm7, 13*32(%rax)
vmovdqa sha256d_8preext2_30(%rip), %ymm0
vmovdqa 0*32(%rax), %ymm4
vpslld $14, %ymm4, %ymm6
vpsrld $3, %ymm4, %ymm4
vpsrld $4, %ymm4, %ymm5
vpxor %ymm5, %ymm4, %ymm4
vpxor %ymm6, %ymm4, %ymm4
vpsrld $11, %ymm5, %ymm5
vpslld $11, %ymm6, %ymm6
vpxor %ymm5, %ymm4, %ymm4
vpxor %ymm6, %ymm4, %ymm4
vpaddd -1*32(%rax), %ymm4, %ymm4
vpslld $13, %ymm3, %ymm2
vpslld $13, %ymm7, %ymm6
vpsrld $10, %ymm3, %ymm3
vpsrld $10, %ymm7, %ymm7
vpaddd 7*32(%rax), %ymm0, %ymm0
vpaddd 8*32(%rax), %ymm4, %ymm4
vpsrld $7, %ymm3, %ymm1
vpsrld $7, %ymm7, %ymm5
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpsrld $2, %ymm1, %ymm1
vpsrld $2, %ymm5, %ymm5
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpslld $2, %ymm2, %ymm2
vpslld $2, %ymm6, %ymm6
vpxor %ymm1, %ymm3, %ymm3
vpxor %ymm5, %ymm7, %ymm7
vpxor %ymm2, %ymm3, %ymm3
vpxor %ymm6, %ymm7, %ymm7
vpaddd %ymm0, %ymm3, %ymm3
vpaddd %ymm4, %ymm7, %ymm7
vmovdqa %ymm3, 14*32(%rax)
vmovdqa %ymm7, 15*32(%rax)
jmp sha256d_ms_8way_avx2_extend_loop2
sha256d_ms_8way_avx2_extend_coda2:
sha256_avx2_extend_round 44
vmovdqa sha256_8h+0(%rip), %ymm7
vmovdqa sha256_8h+32(%rip), %ymm5
vmovdqa sha256_8h+64(%rip), %ymm4
vmovdqa sha256_8h+96(%rip), %ymm3
vmovdqa sha256_8h+128(%rip), %ymm0
vmovdqa sha256_8h+160(%rip), %ymm8
vmovdqa sha256_8h+192(%rip), %ymm9
vmovdqa sha256_8h+224(%rip), %ymm10
movq %rsp, %rax
leaq sha256_8k(%rip), %rcx
jmp sha256d_ms_8way_avx2_main_loop2
.macro sha256_avx2_main_round_red i, r0, r1, r2, r3, r4
vpaddd 32*\i(%rax), \r0, %ymm6
vpaddd 32*\i(%rcx), %ymm6, %ymm6
vpandn \r1, \r3, %ymm1
vpand \r3, \r2, %ymm2
vpxor %ymm2, %ymm1, %ymm1
vpaddd %ymm1, %ymm6, %ymm6
vpslld $7, \r3, %ymm1
vpsrld $6, \r3, \r0
vpsrld $5, \r0, %ymm2
vpxor %ymm1, \r0, \r0
vpxor %ymm2, \r0, \r0
vpslld $14, %ymm1, %ymm1
vpsrld $14, %ymm2, %ymm2
vpxor %ymm1, \r0, \r0
vpxor %ymm2, \r0, \r0
vpslld $5, %ymm1, %ymm1
vpxor %ymm1, \r0, \r0
vpaddd \r0, %ymm6, %ymm6
vpaddd %ymm6, \r4, \r0
.endm
sha256d_ms_8way_avx2_finish:
sha256_avx2_main_round_red 57, %ymm9, %ymm8, %ymm0, %ymm10, %ymm4
sha256_avx2_main_round_red 58, %ymm8, %ymm0, %ymm10, %ymm9, %ymm5
sha256_avx2_main_round_red 59, %ymm0, %ymm10, %ymm9, %ymm8, %ymm7
sha256_avx2_main_round_red 60, %ymm10, %ymm9, %ymm8, %ymm0, %ymm3
vpaddd sha256_8h+224(%rip), %ymm10, %ymm10
vmovdqa %ymm10, 224(%rdi)
movq %rbp, %rsp
popq %rbp
#if defined(_WIN64) || defined(__CYGWIN__)
popq %rsi
vmovdqa 0(%rsp), %xmm6
vmovdqa 16(%rsp), %xmm7
vmovdqa 32(%rsp), %xmm8
vmovdqa 48(%rsp), %xmm9
vmovdqa 64(%rsp), %xmm10
addq $80, %rsp
popq %rdi
#endif
ret
.text
.p2align 6
.globl sha256_use_8way
.globl _sha256_use_8way
sha256_use_8way:
_sha256_use_8way:
pushq %rbx
/* Check for AVX and OSXSAVE support */
movl $1, %eax
cpuid
andl $0x18000000, %ecx
cmpl $0x18000000, %ecx
jne sha256_use_8way_no
/* Check for AVX2 support */
movl $7, %eax
xorl %ecx, %ecx
cpuid
andl $0x00000020, %ebx
cmpl $0x00000020, %ebx
jne sha256_use_8way_no
/* Check for XMM and YMM state support */
xorl %ecx, %ecx
xgetbv
andl $0x00000006, %eax
cmpl $0x00000006, %eax
jne sha256_use_8way_no
sha256_use_8way_yes:
movl $1, %eax
jmp sha256_use_8way_done
sha256_use_8way_no:
xorl %eax, %eax
sha256_use_8way_done:
popq %rbx
ret
#endif /* USE_AVX2 */
#endif
0707010000002B000081A4000003E800000064000000015EF4BCA100006795000000000000000000000000000000000000001A00000000cpuminer-2.5.1/sha2-x86.S/*
* Copyright 2012 pooler@litecoinpool.org
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include "cpuminer-config.h"
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#if defined(USE_ASM) && defined(__i386__)
.data
.p2align 7
sha256_4h:
.long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
.long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85
.long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372
.long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a
.long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f
.long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c
.long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
.long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
.data
.p2align 7
sha256_4k:
.long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98
.long 0x71374491, 0x71374491, 0x71374491, 0x71374491
.long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf
.long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5
.long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b
.long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1
.long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4
.long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5
.long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98
.long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01
.long 0x243185be, 0x243185be, 0x243185be, 0x243185be
.long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3
.long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74
.long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe
.long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7
.long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174
.long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1
.long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786
.long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6
.long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc
.long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f
.long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa
.long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc
.long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da
.long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152
.long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d
.long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8
.long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7
.long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3
.long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147
.long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351
.long 0x14292967, 0x14292967, 0x14292967, 0x14292967
.long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85
.long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138
.long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc
.long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13
.long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354
.long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb
.long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e
.long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85
.long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1
.long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b
.long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70
.long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3
.long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819
.long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624
.long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585
.long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070
.long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116
.long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08
.long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c
.long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5
.long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3
.long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a
.long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f
.long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3
.long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee
.long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f
.long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814
.long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208
.long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa
.long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb
.long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7
.long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2
.data
.p2align 6
sha256d_4preext2_15:
.long 0x00000100, 0x00000100, 0x00000100, 0x00000100
sha256d_4preext2_17:
.long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000
sha256d_4preext2_23:
.long 0x11002000, 0x11002000, 0x11002000, 0x11002000
sha256d_4preext2_24:
.long 0x80000000, 0x80000000, 0x80000000, 0x80000000
sha256d_4preext2_30:
.long 0x00400022, 0x00400022, 0x00400022, 0x00400022
.text
.p2align 5
.globl sha256_init_4way
.globl _sha256_init_4way
sha256_init_4way:
_sha256_init_4way:
movl 4(%esp), %edx
movdqa sha256_4h+0, %xmm0
movdqa sha256_4h+16, %xmm1
movdqa sha256_4h+32, %xmm2
movdqa sha256_4h+48, %xmm3
movdqu %xmm0, 0(%edx)
movdqu %xmm1, 16(%edx)
movdqu %xmm2, 32(%edx)
movdqu %xmm3, 48(%edx)
movdqa sha256_4h+64, %xmm0
movdqa sha256_4h+80, %xmm1
movdqa sha256_4h+96, %xmm2
movdqa sha256_4h+112, %xmm3
movdqu %xmm0, 64(%edx)
movdqu %xmm1, 80(%edx)
movdqu %xmm2, 96(%edx)
movdqu %xmm3, 112(%edx)
ret
.macro sha256_sse2_extend_round i
movdqa (\i-15)*16(%eax), %xmm0
movdqa %xmm0, %xmm2
psrld $3, %xmm0
movdqa %xmm0, %xmm1
pslld $14, %xmm2
psrld $4, %xmm1
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
psrld $11, %xmm1
pslld $11, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
paddd (\i-16)*16(%eax), %xmm0
paddd (\i-7)*16(%eax), %xmm0
movdqa %xmm3, %xmm2
psrld $10, %xmm3
pslld $13, %xmm2
movdqa %xmm3, %xmm1
psrld $7, %xmm1
pxor %xmm1, %xmm3
pxor %xmm2, %xmm3
psrld $2, %xmm1
pslld $2, %xmm2
pxor %xmm1, %xmm3
pxor %xmm2, %xmm3
paddd %xmm0, %xmm3
movdqa %xmm3, \i*16(%eax)
.endm
.macro sha256_sse2_extend_doubleround i
movdqa (\i-15)*16(%eax), %xmm0
movdqa (\i-14)*16(%eax), %xmm4
movdqa %xmm0, %xmm2
movdqa %xmm4, %xmm6
psrld $3, %xmm0
psrld $3, %xmm4
movdqa %xmm0, %xmm1
movdqa %xmm4, %xmm5
pslld $14, %xmm2
pslld $14, %xmm6
psrld $4, %xmm1
psrld $4, %xmm5
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
psrld $11, %xmm1
psrld $11, %xmm5
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
pslld $11, %xmm2
pslld $11, %xmm6
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
paddd (\i-16)*16(%eax), %xmm0
paddd (\i-15)*16(%eax), %xmm4
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
paddd (\i-7)*16(%eax), %xmm0
paddd (\i-6)*16(%eax), %xmm4
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, \i*16(%eax)
movdqa %xmm7, (\i+1)*16(%eax)
.endm
.macro sha256_sse2_main_round i
movdqa 16*(\i)(%eax), %xmm6
movdqa %xmm0, %xmm1
movdqa 16(%esp), %xmm2
pandn %xmm2, %xmm1
paddd 32(%esp), %xmm6
movdqa %xmm2, 32(%esp)
movdqa 0(%esp), %xmm2
movdqa %xmm2, 16(%esp)
pand %xmm0, %xmm2
pxor %xmm2, %xmm1
movdqa %xmm0, 0(%esp)
paddd %xmm1, %xmm6
movdqa %xmm0, %xmm1
psrld $6, %xmm0
paddd 16*(\i)+sha256_4k, %xmm6
movdqa %xmm0, %xmm2
pslld $7, %xmm1
psrld $5, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
pslld $14, %xmm1
psrld $14, %xmm2
pxor %xmm1, %xmm0
pslld $5, %xmm1
pxor %xmm2, %xmm0
pxor %xmm1, %xmm0
movdqa %xmm5, %xmm1
paddd %xmm0, %xmm6
movdqa %xmm3, %xmm0
movdqa %xmm4, %xmm3
movdqa %xmm4, %xmm2
paddd %xmm6, %xmm0
pand %xmm5, %xmm2
pand %xmm7, %xmm1
pand %xmm7, %xmm4
pxor %xmm4, %xmm1
movdqa %xmm5, %xmm4
movdqa %xmm7, %xmm5
pxor %xmm2, %xmm1
paddd %xmm1, %xmm6
movdqa %xmm7, %xmm2
psrld $2, %xmm7
movdqa %xmm7, %xmm1
pslld $10, %xmm2
psrld $11, %xmm1
pxor %xmm2, %xmm7
pslld $9, %xmm2
pxor %xmm1, %xmm7
psrld $9, %xmm1
pxor %xmm2, %xmm7
pslld $11, %xmm2
pxor %xmm1, %xmm7
pxor %xmm2, %xmm7
paddd %xmm6, %xmm7
.endm
.macro sha256_sse2_main_quadround i
sha256_sse2_main_round \i+0
sha256_sse2_main_round \i+1
sha256_sse2_main_round \i+2
sha256_sse2_main_round \i+3
.endm
.macro p2bswap_esi_esp i
movdqu \i*16(%esi), %xmm0
movdqu (\i+1)*16(%esi), %xmm2
pshuflw $0xb1, %xmm0, %xmm0
pshuflw $0xb1, %xmm2, %xmm2
pshufhw $0xb1, %xmm0, %xmm0
pshufhw $0xb1, %xmm2, %xmm2
movdqa %xmm0, %xmm1
movdqa %xmm2, %xmm3
psrlw $8, %xmm1
psrlw $8, %xmm3
psllw $8, %xmm0
psllw $8, %xmm2
pxor %xmm1, %xmm0
pxor %xmm3, %xmm2
movdqa %xmm0, (\i+3)*16(%esp)
movdqa %xmm2, (\i+4)*16(%esp)
.endm
.text
.p2align 5
.globl sha256_transform_4way
.globl _sha256_transform_4way
sha256_transform_4way:
_sha256_transform_4way:
pushl %edi
pushl %esi
movl 12(%esp), %edi
movl 16(%esp), %esi
movl 20(%esp), %ecx
movl %esp, %edx
subl $67*16, %esp
andl $-128, %esp
testl %ecx, %ecx
jnz sha256_transform_4way_swap
movdqu 0*16(%esi), %xmm0
movdqu 1*16(%esi), %xmm1
movdqu 2*16(%esi), %xmm2
movdqu 3*16(%esi), %xmm3
movdqu 4*16(%esi), %xmm4
movdqu 5*16(%esi), %xmm5
movdqu 6*16(%esi), %xmm6
movdqu 7*16(%esi), %xmm7
movdqa %xmm0, 3*16(%esp)
movdqa %xmm1, 4*16(%esp)
movdqa %xmm2, 5*16(%esp)
movdqa %xmm3, 6*16(%esp)
movdqa %xmm4, 7*16(%esp)
movdqa %xmm5, 8*16(%esp)
movdqa %xmm6, 9*16(%esp)
movdqa %xmm7, 10*16(%esp)
movdqu 8*16(%esi), %xmm0
movdqu 9*16(%esi), %xmm1
movdqu 10*16(%esi), %xmm2
movdqu 11*16(%esi), %xmm3
movdqu 12*16(%esi), %xmm4
movdqu 13*16(%esi), %xmm5
movdqu 14*16(%esi), %xmm6
movdqu 15*16(%esi), %xmm7
movdqa %xmm0, 11*16(%esp)
movdqa %xmm1, 12*16(%esp)
movdqa %xmm2, 13*16(%esp)
movdqa %xmm3, 14*16(%esp)
movdqa %xmm4, 15*16(%esp)
movdqa %xmm5, 16*16(%esp)
movdqa %xmm6, 17*16(%esp)
movdqa %xmm7, 18*16(%esp)
jmp sha256_transform_4way_extend
.p2align 5
sha256_transform_4way_swap:
p2bswap_esi_esp 0
p2bswap_esi_esp 2
p2bswap_esi_esp 4
p2bswap_esi_esp 6
p2bswap_esi_esp 8
p2bswap_esi_esp 10
p2bswap_esi_esp 12
p2bswap_esi_esp 14
sha256_transform_4way_extend:
leal 19*16(%esp), %ecx
leal 48*16(%ecx), %eax
movdqa -2*16(%ecx), %xmm3
movdqa -1*16(%ecx), %xmm7
sha256_transform_4way_extend_loop:
movdqa -15*16(%ecx), %xmm0
movdqa -14*16(%ecx), %xmm4
movdqa %xmm0, %xmm2
movdqa %xmm4, %xmm6
psrld $3, %xmm0
psrld $3, %xmm4
movdqa %xmm0, %xmm1
movdqa %xmm4, %xmm5
pslld $14, %xmm2
pslld $14, %xmm6
psrld $4, %xmm1
psrld $4, %xmm5
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
psrld $11, %xmm1
psrld $11, %xmm5
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
pslld $11, %xmm2
pslld $11, %xmm6
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
paddd -16*16(%ecx), %xmm0
paddd -15*16(%ecx), %xmm4
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
paddd -7*16(%ecx), %xmm0
paddd -6*16(%ecx), %xmm4
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, (%ecx)
movdqa %xmm7, 16(%ecx)
addl $2*16, %ecx
cmpl %ecx, %eax
jne sha256_transform_4way_extend_loop
movdqu 0(%edi), %xmm7
movdqu 16(%edi), %xmm5
movdqu 32(%edi), %xmm4
movdqu 48(%edi), %xmm3
movdqu 64(%edi), %xmm0
movdqu 80(%edi), %xmm1
movdqu 96(%edi), %xmm2
movdqu 112(%edi), %xmm6
movdqa %xmm1, 0(%esp)
movdqa %xmm2, 16(%esp)
movdqa %xmm6, 32(%esp)
xorl %eax, %eax
sha256_transform_4way_main_loop:
movdqa 3*16(%esp, %eax), %xmm6
paddd sha256_4k(%eax), %xmm6
paddd 32(%esp), %xmm6
movdqa %xmm0, %xmm1
movdqa 16(%esp), %xmm2
pandn %xmm2, %xmm1
movdqa %xmm2, 32(%esp)
movdqa 0(%esp), %xmm2
movdqa %xmm2, 16(%esp)
pand %xmm0, %xmm2
pxor %xmm2, %xmm1
movdqa %xmm0, 0(%esp)
paddd %xmm1, %xmm6
movdqa %xmm0, %xmm1
psrld $6, %xmm0
movdqa %xmm0, %xmm2
pslld $7, %xmm1
psrld $5, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
pslld $14, %xmm1
psrld $14, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
pslld $5, %xmm1
pxor %xmm1, %xmm0
paddd %xmm0, %xmm6
movdqa %xmm3, %xmm0
paddd %xmm6, %xmm0
movdqa %xmm5, %xmm1
movdqa %xmm4, %xmm3
movdqa %xmm4, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm4
pand %xmm7, %xmm1
pxor %xmm4, %xmm1
movdqa %xmm5, %xmm4
movdqa %xmm7, %xmm5
pxor %xmm2, %xmm1
paddd %xmm1, %xmm6
movdqa %xmm7, %xmm2
psrld $2, %xmm7
movdqa %xmm7, %xmm1
pslld $10, %xmm2
psrld $11, %xmm1
pxor %xmm2, %xmm7
pxor %xmm1, %xmm7
pslld $9, %xmm2
psrld $9, %xmm1
pxor %xmm2, %xmm7
pxor %xmm1, %xmm7
pslld $11, %xmm2
pxor %xmm2, %xmm7
paddd %xmm6, %xmm7
addl $16, %eax
cmpl $16*64, %eax
jne sha256_transform_4way_main_loop
movdqu 0(%edi), %xmm1
movdqu 16(%edi), %xmm2
paddd %xmm1, %xmm7
paddd %xmm2, %xmm5
movdqu 32(%edi), %xmm1
movdqu 48(%edi), %xmm2
paddd %xmm1, %xmm4
paddd %xmm2, %xmm3
movdqu %xmm7, 0(%edi)
movdqu %xmm5, 16(%edi)
movdqu %xmm4, 32(%edi)
movdqu %xmm3, 48(%edi)
movdqu 64(%edi), %xmm1
movdqu 80(%edi), %xmm2
movdqu 96(%edi), %xmm6
movdqu 112(%edi), %xmm7
paddd %xmm1, %xmm0
paddd 0(%esp), %xmm2
paddd 16(%esp), %xmm6
paddd 32(%esp), %xmm7
movdqu %xmm0, 64(%edi)
movdqu %xmm2, 80(%edi)
movdqu %xmm6, 96(%edi)
movdqu %xmm7, 112(%edi)
movl %edx, %esp
popl %esi
popl %edi
ret
.text
.p2align 5
.globl sha256d_ms_4way
.globl _sha256d_ms_4way
sha256d_ms_4way:
_sha256d_ms_4way:
pushl %edi
pushl %esi
pushl %ebp
movl 16(%esp), %edi
movl 20(%esp), %esi
movl 24(%esp), %edx
movl 28(%esp), %ecx
movl %esp, %ebp
subl $67*16, %esp
andl $-128, %esp
leal 256(%esi), %eax
sha256d_ms_4way_extend_loop1:
movdqa 3*16(%esi), %xmm0
movdqa 2*16(%eax), %xmm3
movdqa 3*16(%eax), %xmm7
movdqa %xmm3, 5*16(%esp)
movdqa %xmm7, 6*16(%esp)
movdqa %xmm0, %xmm2
paddd %xmm0, %xmm7
psrld $3, %xmm0
movdqa %xmm0, %xmm1
pslld $14, %xmm2
psrld $4, %xmm1
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
psrld $11, %xmm1
pslld $11, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
paddd %xmm0, %xmm3
movdqa %xmm3, 2*16(%eax)
movdqa %xmm7, 3*16(%eax)
movdqa 4*16(%eax), %xmm0
movdqa %xmm0, 7*16(%esp)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
movdqa %xmm3, 4*16(%eax)
movdqa %xmm7, 5*16(%eax)
movdqa 6*16(%eax), %xmm0
movdqa 7*16(%eax), %xmm4
movdqa %xmm0, 9*16(%esp)
movdqa %xmm4, 10*16(%esp)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, 6*16(%eax)
movdqa %xmm7, 7*16(%eax)
movdqa 8*16(%eax), %xmm0
movdqa 2*16(%eax), %xmm4
movdqa %xmm0, 11*16(%esp)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, 8*16(%eax)
movdqa %xmm7, 9*16(%eax)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd 3*16(%eax), %xmm3
paddd 4*16(%eax), %xmm7
movdqa %xmm3, 10*16(%eax)
movdqa %xmm7, 11*16(%eax)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd 5*16(%eax), %xmm3
paddd 6*16(%eax), %xmm7
movdqa %xmm3, 12*16(%eax)
movdqa %xmm7, 13*16(%eax)
movdqa 14*16(%eax), %xmm0
movdqa 15*16(%eax), %xmm4
movdqa %xmm0, 17*16(%esp)
movdqa %xmm4, 18*16(%esp)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
paddd 7*16(%eax), %xmm0
paddd 8*16(%eax), %xmm4
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, 14*16(%eax)
movdqa %xmm7, 15*16(%eax)
sha256d_ms_4way_extend_loop2:
sha256_sse2_extend_doubleround 16
sha256_sse2_extend_doubleround 18
sha256_sse2_extend_doubleround 20
sha256_sse2_extend_doubleround 22
sha256_sse2_extend_doubleround 24
sha256_sse2_extend_doubleround 26
sha256_sse2_extend_doubleround 28
sha256_sse2_extend_doubleround 30
sha256_sse2_extend_doubleround 32
sha256_sse2_extend_doubleround 34
sha256_sse2_extend_doubleround 36
sha256_sse2_extend_doubleround 38
sha256_sse2_extend_doubleround 40
sha256_sse2_extend_doubleround 42
jz sha256d_ms_4way_extend_coda2
sha256_sse2_extend_doubleround 44
sha256_sse2_extend_doubleround 46
movdqa 0(%ecx), %xmm3
movdqa 16(%ecx), %xmm0
movdqa 32(%ecx), %xmm1
movdqa 48(%ecx), %xmm2
movdqa 64(%ecx), %xmm6
movdqa 80(%ecx), %xmm7
movdqa 96(%ecx), %xmm5
movdqa 112(%ecx), %xmm4
movdqa %xmm1, 0(%esp)
movdqa %xmm2, 16(%esp)
movdqa %xmm6, 32(%esp)
movl %esi, %eax
jmp sha256d_ms_4way_main_loop1
sha256d_ms_4way_main_loop2:
sha256_sse2_main_round 0
sha256_sse2_main_round 1
sha256_sse2_main_round 2
sha256d_ms_4way_main_loop1:
sha256_sse2_main_round 3
sha256_sse2_main_quadround 4
sha256_sse2_main_quadround 8
sha256_sse2_main_quadround 12
sha256_sse2_main_quadround 16
sha256_sse2_main_quadround 20
sha256_sse2_main_quadround 24
sha256_sse2_main_quadround 28
sha256_sse2_main_quadround 32
sha256_sse2_main_quadround 36
sha256_sse2_main_quadround 40
sha256_sse2_main_quadround 44
sha256_sse2_main_quadround 48
sha256_sse2_main_quadround 52
sha256_sse2_main_round 56
jz sha256d_ms_4way_finish
sha256_sse2_main_round 57
sha256_sse2_main_round 58
sha256_sse2_main_round 59
sha256_sse2_main_quadround 60
movdqa 5*16(%esp), %xmm1
movdqa 6*16(%esp), %xmm2
movdqa 7*16(%esp), %xmm6
movdqa %xmm1, 18*16(%esi)
movdqa %xmm2, 19*16(%esi)
movdqa %xmm6, 20*16(%esi)
movdqa 9*16(%esp), %xmm1
movdqa 10*16(%esp), %xmm2
movdqa 11*16(%esp), %xmm6
movdqa %xmm1, 22*16(%esi)
movdqa %xmm2, 23*16(%esi)
movdqa %xmm6, 24*16(%esi)
movdqa 17*16(%esp), %xmm1
movdqa 18*16(%esp), %xmm2
movdqa %xmm1, 30*16(%esi)
movdqa %xmm2, 31*16(%esi)
movdqa 0(%esp), %xmm1
movdqa 16(%esp), %xmm2
movdqa 32(%esp), %xmm6
paddd 0(%edx), %xmm7
paddd 16(%edx), %xmm5
paddd 32(%edx), %xmm4
paddd 48(%edx), %xmm3
paddd 64(%edx), %xmm0
paddd 80(%edx), %xmm1
paddd 96(%edx), %xmm2
paddd 112(%edx), %xmm6
movdqa %xmm7, 48+0(%esp)
movdqa %xmm5, 48+16(%esp)
movdqa %xmm4, 48+32(%esp)
movdqa %xmm3, 48+48(%esp)
movdqa %xmm0, 48+64(%esp)
movdqa %xmm1, 48+80(%esp)
movdqa %xmm2, 48+96(%esp)
movdqa %xmm6, 48+112(%esp)
movdqa sha256d_4preext2_15, %xmm1
movdqa sha256d_4preext2_24, %xmm2
pxor %xmm0, %xmm0
movdqa %xmm2, 48+128(%esp)
movdqa %xmm0, 48+144(%esp)
movdqa %xmm0, 48+160(%esp)
movdqa %xmm0, 48+176(%esp)
movdqa %xmm0, 48+192(%esp)
movdqa %xmm0, 48+208(%esp)
movdqa %xmm0, 48+224(%esp)
movdqa %xmm1, 48+240(%esp)
leal 19*16(%esp), %eax
cmpl %eax, %eax
movdqa -15*16(%eax), %xmm0
movdqa -14*16(%eax), %xmm4
movdqa %xmm0, %xmm2
movdqa %xmm4, %xmm6
psrld $3, %xmm0
psrld $3, %xmm4
movdqa %xmm0, %xmm1
movdqa %xmm4, %xmm5
pslld $14, %xmm2
pslld $14, %xmm6
psrld $4, %xmm1
psrld $4, %xmm5
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
psrld $11, %xmm1
psrld $11, %xmm5
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
pslld $11, %xmm2
pslld $11, %xmm6
pxor %xmm1, %xmm0
pxor %xmm5, %xmm4
pxor %xmm2, %xmm0
pxor %xmm6, %xmm4
paddd -16*16(%eax), %xmm0
paddd -15*16(%eax), %xmm4
paddd sha256d_4preext2_17, %xmm4
movdqa %xmm0, %xmm3
movdqa %xmm4, %xmm7
movdqa %xmm3, 0*16(%eax)
movdqa %xmm7, 1*16(%eax)
sha256_sse2_extend_doubleround 2
sha256_sse2_extend_doubleround 4
movdqa -9*16(%eax), %xmm0
movdqa sha256d_4preext2_23, %xmm4
movdqa %xmm0, %xmm2
psrld $3, %xmm0
movdqa %xmm0, %xmm1
pslld $14, %xmm2
psrld $4, %xmm1
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
psrld $11, %xmm1
pslld $11, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
paddd -10*16(%eax), %xmm0
paddd -9*16(%eax), %xmm4
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
paddd -1*16(%eax), %xmm0
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
paddd 0*16(%eax), %xmm4
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, 6*16(%eax)
movdqa %xmm7, 7*16(%eax)
movdqa sha256d_4preext2_24, %xmm0
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
paddd 1*16(%eax), %xmm0
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd 2*16(%eax), %xmm7
movdqa %xmm3, 8*16(%eax)
movdqa %xmm7, 9*16(%eax)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd 3*16(%eax), %xmm3
paddd 4*16(%eax), %xmm7
movdqa %xmm3, 10*16(%eax)
movdqa %xmm7, 11*16(%eax)
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd 5*16(%eax), %xmm3
paddd 6*16(%eax), %xmm7
movdqa %xmm3, 12*16(%eax)
movdqa %xmm7, 13*16(%eax)
movdqa sha256d_4preext2_30, %xmm0
movdqa 0*16(%eax), %xmm4
movdqa %xmm4, %xmm6
psrld $3, %xmm4
movdqa %xmm4, %xmm5
pslld $14, %xmm6
psrld $4, %xmm5
pxor %xmm5, %xmm4
pxor %xmm6, %xmm4
psrld $11, %xmm5
pslld $11, %xmm6
pxor %xmm5, %xmm4
pxor %xmm6, %xmm4
paddd -1*16(%eax), %xmm4
movdqa %xmm3, %xmm2
movdqa %xmm7, %xmm6
psrld $10, %xmm3
psrld $10, %xmm7
movdqa %xmm3, %xmm1
movdqa %xmm7, %xmm5
paddd 7*16(%eax), %xmm0
pslld $13, %xmm2
pslld $13, %xmm6
psrld $7, %xmm1
psrld $7, %xmm5
paddd 8*16(%eax), %xmm4
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
psrld $2, %xmm1
psrld $2, %xmm5
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
pslld $2, %xmm2
pslld $2, %xmm6
pxor %xmm1, %xmm3
pxor %xmm5, %xmm7
pxor %xmm2, %xmm3
pxor %xmm6, %xmm7
paddd %xmm0, %xmm3
paddd %xmm4, %xmm7
movdqa %xmm3, 14*16(%eax)
movdqa %xmm7, 15*16(%eax)
jmp sha256d_ms_4way_extend_loop2
sha256d_ms_4way_extend_coda2:
sha256_sse2_extend_round 44
movdqa sha256_4h+0, %xmm7
movdqa sha256_4h+16, %xmm5
movdqa sha256_4h+32, %xmm4
movdqa sha256_4h+48, %xmm3
movdqa sha256_4h+64, %xmm0
movdqa sha256_4h+80, %xmm1
movdqa sha256_4h+96, %xmm2
movdqa sha256_4h+112, %xmm6
movdqa %xmm1, 0(%esp)
movdqa %xmm2, 16(%esp)
movdqa %xmm6, 32(%esp)
leal 48(%esp), %eax
jmp sha256d_ms_4way_main_loop2
.macro sha256_sse2_main_round_red i, r7
movdqa 16*(\i)(%eax), %xmm6
paddd 16*(\i)+sha256_4k, %xmm6
paddd 32(%esp), %xmm6
movdqa %xmm0, %xmm1
movdqa 16(%esp), %xmm2
paddd \r7, %xmm6
pandn %xmm2, %xmm1
movdqa %xmm2, 32(%esp)
movdqa 0(%esp), %xmm2
movdqa %xmm2, 16(%esp)
pand %xmm0, %xmm2
pxor %xmm2, %xmm1
movdqa %xmm0, 0(%esp)
paddd %xmm1, %xmm6
movdqa %xmm0, %xmm1
psrld $6, %xmm0
movdqa %xmm0, %xmm2
pslld $7, %xmm1
psrld $5, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
pslld $14, %xmm1
psrld $14, %xmm2
pxor %xmm1, %xmm0
pxor %xmm2, %xmm0
pslld $5, %xmm1
pxor %xmm1, %xmm0
paddd %xmm6, %xmm0
.endm
sha256d_ms_4way_finish:
sha256_sse2_main_round_red 57, %xmm3
sha256_sse2_main_round_red 58, %xmm4
sha256_sse2_main_round_red 59, %xmm5
sha256_sse2_main_round_red 60, %xmm7
paddd sha256_4h+112, %xmm0
movdqa %xmm0, 112(%edi)
movl %ebp, %esp
popl %ebp
popl %esi
popl %edi
ret
.text
.p2align 5
.globl sha256_use_4way
.globl _sha256_use_4way
sha256_use_4way:
_sha256_use_4way:
pushl %ebx
/* Check for SSE2 availability */
movl $1, %eax
cpuid
andl $0x04000000, %edx
jnz sha256_use_4way_sse2
xorl %eax, %eax
popl %ebx
ret
sha256_use_4way_sse2:
movl $1, %eax
popl %ebx
ret
#endif
0707010000002C000081A4000003E800000064000000015EF4BCA100003EC7000000000000000000000000000000000000001600000000cpuminer-2.5.1/sha2.c/*
* Copyright 2011 ArtForz
* Copyright 2011-2013 pooler
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include "cpuminer-config.h"
#include "miner.h"
#include <string.h>
#include <inttypes.h>
#if defined(USE_ASM) && \
(defined(__x86_64__) || \
(defined(__arm__) && defined(__APCS_32__)) || \
(defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)))
#define EXTERN_SHA256
#endif
static const uint32_t sha256_h[8] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
};
static const uint32_t sha256_k[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
void sha256_init(uint32_t *state)
{
memcpy(state, sha256_h, 32);
}
/* Elementary functions used by SHA256 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3))
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10))
/* SHA256 round function */
#define RND(a, b, c, d, e, f, g, h, k) \
do { \
t0 = h + S1(e) + Ch(e, f, g) + k; \
t1 = S0(a) + Maj(a, b, c); \
d += t0; \
h = t0 + t1; \
} while (0)
/* Adjusted round function for rotating state */
#define RNDr(S, W, i) \
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
S[(66 - i) % 8], S[(67 - i) % 8], \
S[(68 - i) % 8], S[(69 - i) % 8], \
S[(70 - i) % 8], S[(71 - i) % 8], \
W[i] + sha256_k[i])
#ifndef EXTERN_SHA256
/*
* SHA256 block compression function. The 256-bit state is transformed via
* the 512-bit input block to produce a new state.
*/
void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
{
uint32_t W[64];
uint32_t S[8];
uint32_t t0, t1;
int i;
/* 1. Prepare message schedule W. */
if (swap) {
for (i = 0; i < 16; i++)
W[i] = swab32(block[i]);
} else
memcpy(W, block, 64);
for (i = 16; i < 64; i += 2) {
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
}
/* 2. Initialize working variables. */
memcpy(S, state, 32);
/* 3. Mix. */
RNDr(S, W, 0);
RNDr(S, W, 1);
RNDr(S, W, 2);
RNDr(S, W, 3);
RNDr(S, W, 4);
RNDr(S, W, 5);
RNDr(S, W, 6);
RNDr(S, W, 7);
RNDr(S, W, 8);
RNDr(S, W, 9);
RNDr(S, W, 10);
RNDr(S, W, 11);
RNDr(S, W, 12);
RNDr(S, W, 13);
RNDr(S, W, 14);
RNDr(S, W, 15);
RNDr(S, W, 16);
RNDr(S, W, 17);
RNDr(S, W, 18);
RNDr(S, W, 19);
RNDr(S, W, 20);
RNDr(S, W, 21);
RNDr(S, W, 22);
RNDr(S, W, 23);
RNDr(S, W, 24);
RNDr(S, W, 25);
RNDr(S, W, 26);
RNDr(S, W, 27);
RNDr(S, W, 28);
RNDr(S, W, 29);
RNDr(S, W, 30);
RNDr(S, W, 31);
RNDr(S, W, 32);
RNDr(S, W, 33);
RNDr(S, W, 34);
RNDr(S, W, 35);
RNDr(S, W, 36);
RNDr(S, W, 37);
RNDr(S, W, 38);
RNDr(S, W, 39);
RNDr(S, W, 40);
RNDr(S, W, 41);
RNDr(S, W, 42);
RNDr(S, W, 43);
RNDr(S, W, 44);
RNDr(S, W, 45);
RNDr(S, W, 46);
RNDr(S, W, 47);
RNDr(S, W, 48);
RNDr(S, W, 49);
RNDr(S, W, 50);
RNDr(S, W, 51);
RNDr(S, W, 52);
RNDr(S, W, 53);
RNDr(S, W, 54);
RNDr(S, W, 55);
RNDr(S, W, 56);
RNDr(S, W, 57);
RNDr(S, W, 58);
RNDr(S, W, 59);
RNDr(S, W, 60);
RNDr(S, W, 61);
RNDr(S, W, 62);
RNDr(S, W, 63);
/* 4. Mix local working variables into global state */
for (i = 0; i < 8; i++)
state[i] += S[i];
}
#endif /* EXTERN_SHA256 */
static const uint32_t sha256d_hash1[16] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x80000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000100
};
static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
{
uint32_t S[16];
int i;
sha256_init(S);
sha256_transform(S, data, 0);
sha256_transform(S, data + 16, 0);
memcpy(S + 8, sha256d_hash1 + 8, 32);
sha256_init(hash);
sha256_transform(hash, S, 0);
for (i = 0; i < 8; i++)
hash[i] = swab32(hash[i]);
}
void sha256d(unsigned char *hash, const unsigned char *data, int len)
{
uint32_t S[16], T[16];
int i, r;
sha256_init(S);
for (r = len; r > -9; r -= 64) {
if (r < 64)
memset(T, 0, 64);
memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
if (r >= 0 && r < 64)
((unsigned char *)T)[r] = 0x80;
for (i = 0; i < 16; i++)
T[i] = be32dec(T + i);
if (r < 56)
T[15] = 8 * len;
sha256_transform(S, T, 0);
}
memcpy(S + 8, sha256d_hash1 + 8, 32);
sha256_init(T);
sha256_transform(T, S, 0);
for (i = 0; i < 8; i++)
be32enc((uint32_t *)hash + i, T[i]);
}
static inline void sha256d_preextend(uint32_t *W)
{
W[16] = s1(W[14]) + W[ 9] + s0(W[ 1]) + W[ 0];
W[17] = s1(W[15]) + W[10] + s0(W[ 2]) + W[ 1];
W[18] = s1(W[16]) + W[11] + W[ 2];
W[19] = s1(W[17]) + W[12] + s0(W[ 4]);
W[20] = W[13] + s0(W[ 5]) + W[ 4];
W[21] = W[14] + s0(W[ 6]) + W[ 5];
W[22] = W[15] + s0(W[ 7]) + W[ 6];
W[23] = W[16] + s0(W[ 8]) + W[ 7];
W[24] = W[17] + s0(W[ 9]) + W[ 8];
W[25] = s0(W[10]) + W[ 9];
W[26] = s0(W[11]) + W[10];
W[27] = s0(W[12]) + W[11];
W[28] = s0(W[13]) + W[12];
W[29] = s0(W[14]) + W[13];
W[30] = s0(W[15]) + W[14];
W[31] = s0(W[16]) + W[15];
}
static inline void sha256d_prehash(uint32_t *S, const uint32_t *W)
{
uint32_t t0, t1;
RNDr(S, W, 0);
RNDr(S, W, 1);
RNDr(S, W, 2);
}
#ifdef EXTERN_SHA256
void sha256d_ms(uint32_t *hash, uint32_t *W,
const uint32_t *midstate, const uint32_t *prehash);
#else
static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
const uint32_t *midstate, const uint32_t *prehash)
{
uint32_t S[64];
uint32_t t0, t1;
int i;
S[18] = W[18];
S[19] = W[19];
S[20] = W[20];
S[22] = W[22];
S[23] = W[23];
S[24] = W[24];
S[30] = W[30];
S[31] = W[31];
W[18] += s0(W[3]);
W[19] += W[3];
W[20] += s1(W[18]);
W[21] = s1(W[19]);
W[22] += s1(W[20]);
W[23] += s1(W[21]);
W[24] += s1(W[22]);
W[25] = s1(W[23]) + W[18];
W[26] = s1(W[24]) + W[19];
W[27] = s1(W[25]) + W[20];
W[28] = s1(W[26]) + W[21];
W[29] = s1(W[27]) + W[22];
W[30] += s1(W[28]) + W[23];
W[31] += s1(W[29]) + W[24];
for (i = 32; i < 64; i += 2) {
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
}
memcpy(S, prehash, 32);
RNDr(S, W, 3);
RNDr(S, W, 4);
RNDr(S, W, 5);
RNDr(S, W, 6);
RNDr(S, W, 7);
RNDr(S, W, 8);
RNDr(S, W, 9);
RNDr(S, W, 10);
RNDr(S, W, 11);
RNDr(S, W, 12);
RNDr(S, W, 13);
RNDr(S, W, 14);
RNDr(S, W, 15);
RNDr(S, W, 16);
RNDr(S, W, 17);
RNDr(S, W, 18);
RNDr(S, W, 19);
RNDr(S, W, 20);
RNDr(S, W, 21);
RNDr(S, W, 22);
RNDr(S, W, 23);
RNDr(S, W, 24);
RNDr(S, W, 25);
RNDr(S, W, 26);
RNDr(S, W, 27);
RNDr(S, W, 28);
RNDr(S, W, 29);
RNDr(S, W, 30);
RNDr(S, W, 31);
RNDr(S, W, 32);
RNDr(S, W, 33);
RNDr(S, W, 34);
RNDr(S, W, 35);
RNDr(S, W, 36);
RNDr(S, W, 37);
RNDr(S, W, 38);
RNDr(S, W, 39);
RNDr(S, W, 40);
RNDr(S, W, 41);
RNDr(S, W, 42);
RNDr(S, W, 43);
RNDr(S, W, 44);
RNDr(S, W, 45);
RNDr(S, W, 46);
RNDr(S, W, 47);
RNDr(S, W, 48);
RNDr(S, W, 49);
RNDr(S, W, 50);
RNDr(S, W, 51);
RNDr(S, W, 52);
RNDr(S, W, 53);
RNDr(S, W, 54);
RNDr(S, W, 55);
RNDr(S, W, 56);
RNDr(S, W, 57);
RNDr(S, W, 58);
RNDr(S, W, 59);
RNDr(S, W, 60);
RNDr(S, W, 61);
RNDr(S, W, 62);
RNDr(S, W, 63);
for (i = 0; i < 8; i++)
S[i] += midstate[i];
W[18] = S[18];
W[19] = S[19];
W[20] = S[20];
W[22] = S[22];
W[23] = S[23];
W[24] = S[24];
W[30] = S[30];
W[31] = S[31];
memcpy(S + 8, sha256d_hash1 + 8, 32);
S[16] = s1(sha256d_hash1[14]) + sha256d_hash1[ 9] + s0(S[ 1]) + S[ 0];
S[17] = s1(sha256d_hash1[15]) + sha256d_hash1[10] + s0(S[ 2]) + S[ 1];
S[18] = s1(S[16]) + sha256d_hash1[11] + s0(S[ 3]) + S[ 2];
S[19] = s1(S[17]) + sha256d_hash1[12] + s0(S[ 4]) + S[ 3];
S[20] = s1(S[18]) + sha256d_hash1[13] + s0(S[ 5]) + S[ 4];
S[21] = s1(S[19]) + sha256d_hash1[14] + s0(S[ 6]) + S[ 5];
S[22] = s1(S[20]) + sha256d_hash1[15] + s0(S[ 7]) + S[ 6];
S[23] = s1(S[21]) + S[16] + s0(sha256d_hash1[ 8]) + S[ 7];
S[24] = s1(S[22]) + S[17] + s0(sha256d_hash1[ 9]) + sha256d_hash1[ 8];
S[25] = s1(S[23]) + S[18] + s0(sha256d_hash1[10]) + sha256d_hash1[ 9];
S[26] = s1(S[24]) + S[19] + s0(sha256d_hash1[11]) + sha256d_hash1[10];
S[27] = s1(S[25]) + S[20] + s0(sha256d_hash1[12]) + sha256d_hash1[11];
S[28] = s1(S[26]) + S[21] + s0(sha256d_hash1[13]) + sha256d_hash1[12];
S[29] = s1(S[27]) + S[22] + s0(sha256d_hash1[14]) + sha256d_hash1[13];
S[30] = s1(S[28]) + S[23] + s0(sha256d_hash1[15]) + sha256d_hash1[14];
S[31] = s1(S[29]) + S[24] + s0(S[16]) + sha256d_hash1[15];
for (i = 32; i < 60; i += 2) {
S[i] = s1(S[i - 2]) + S[i - 7] + s0(S[i - 15]) + S[i - 16];
S[i+1] = s1(S[i - 1]) + S[i - 6] + s0(S[i - 14]) + S[i - 15];
}
S[60] = s1(S[58]) + S[53] + s0(S[45]) + S[44];
sha256_init(hash);
RNDr(hash, S, 0);
RNDr(hash, S, 1);
RNDr(hash, S, 2);
RNDr(hash, S, 3);
RNDr(hash, S, 4);
RNDr(hash, S, 5);
RNDr(hash, S, 6);
RNDr(hash, S, 7);
RNDr(hash, S, 8);
RNDr(hash, S, 9);
RNDr(hash, S, 10);
RNDr(hash, S, 11);
RNDr(hash, S, 12);
RNDr(hash, S, 13);
RNDr(hash, S, 14);
RNDr(hash, S, 15);
RNDr(hash, S, 16);
RNDr(hash, S, 17);
RNDr(hash, S, 18);
RNDr(hash, S, 19);
RNDr(hash, S, 20);
RNDr(hash, S, 21);
RNDr(hash, S, 22);
RNDr(hash, S, 23);
RNDr(hash, S, 24);
RNDr(hash, S, 25);
RNDr(hash, S, 26);
RNDr(hash, S, 27);
RNDr(hash, S, 28);
RNDr(hash, S, 29);
RNDr(hash, S, 30);
RNDr(hash, S, 31);
RNDr(hash, S, 32);
RNDr(hash, S, 33);
RNDr(hash, S, 34);
RNDr(hash, S, 35);
RNDr(hash, S, 36);
RNDr(hash, S, 37);
RNDr(hash, S, 38);
RNDr(hash, S, 39);
RNDr(hash, S, 40);
RNDr(hash, S, 41);
RNDr(hash, S, 42);
RNDr(hash, S, 43);
RNDr(hash, S, 44);
RNDr(hash, S, 45);
RNDr(hash, S, 46);
RNDr(hash, S, 47);
RNDr(hash, S, 48);
RNDr(hash, S, 49);
RNDr(hash, S, 50);
RNDr(hash, S, 51);
RNDr(hash, S, 52);
RNDr(hash, S, 53);
RNDr(hash, S, 54);
RNDr(hash, S, 55);
RNDr(hash, S, 56);
hash[2] += hash[6] + S1(hash[3]) + Ch(hash[3], hash[4], hash[5])
+ S[57] + sha256_k[57];
hash[1] += hash[5] + S1(hash[2]) + Ch(hash[2], hash[3], hash[4])
+ S[58] + sha256_k[58];
hash[0] += hash[4] + S1(hash[1]) + Ch(hash[1], hash[2], hash[3])
+ S[59] + sha256_k[59];
hash[7] += hash[3] + S1(hash[0]) + Ch(hash[0], hash[1], hash[2])
+ S[60] + sha256_k[60]
+ sha256_h[7];
}
#endif /* EXTERN_SHA256 */
#ifdef HAVE_SHA256_4WAY
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
const uint32_t *midstate, const uint32_t *prehash);
static inline int scanhash_sha256d_4way(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t data[4 * 64] __attribute__((aligned(128)));
uint32_t hash[4 * 8] __attribute__((aligned(32)));
uint32_t midstate[4 * 8] __attribute__((aligned(32)));
uint32_t prehash[4 * 8] __attribute__((aligned(32)));
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
int i, j;
memcpy(data, pdata + 16, 64);
sha256d_preextend(data);
for (i = 31; i >= 0; i--)
for (j = 0; j < 4; j++)
data[i * 4 + j] = data[i];
sha256_init(midstate);
sha256_transform(midstate, pdata, 0);
memcpy(prehash, midstate, 32);
sha256d_prehash(prehash, pdata + 16);
for (i = 7; i >= 0; i--) {
for (j = 0; j < 4; j++) {
midstate[i * 4 + j] = midstate[i];
prehash[i * 4 + j] = prehash[i];
}
}
do {
for (i = 0; i < 4; i++)
data[4 * 3 + i] = ++n;
sha256d_ms_4way(hash, data, midstate, prehash);
for (i = 0; i < 4; i++) {
if (swab32(hash[4 * 7 + i]) <= Htarg) {
pdata[19] = data[4 * 3 + i];
sha256d_80_swap(hash, pdata);
if (fulltest(hash, ptarget)) {
*hashes_done = n - first_nonce + 1;
return 1;
}
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
#endif /* HAVE_SHA256_4WAY */
#ifdef HAVE_SHA256_8WAY
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
const uint32_t *midstate, const uint32_t *prehash);
static inline int scanhash_sha256d_8way(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t data[8 * 64] __attribute__((aligned(128)));
uint32_t hash[8 * 8] __attribute__((aligned(32)));
uint32_t midstate[8 * 8] __attribute__((aligned(32)));
uint32_t prehash[8 * 8] __attribute__((aligned(32)));
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
int i, j;
memcpy(data, pdata + 16, 64);
sha256d_preextend(data);
for (i = 31; i >= 0; i--)
for (j = 0; j < 8; j++)
data[i * 8 + j] = data[i];
sha256_init(midstate);
sha256_transform(midstate, pdata, 0);
memcpy(prehash, midstate, 32);
sha256d_prehash(prehash, pdata + 16);
for (i = 7; i >= 0; i--) {
for (j = 0; j < 8; j++) {
midstate[i * 8 + j] = midstate[i];
prehash[i * 8 + j] = prehash[i];
}
}
do {
for (i = 0; i < 8; i++)
data[8 * 3 + i] = ++n;
sha256d_ms_8way(hash, data, midstate, prehash);
for (i = 0; i < 8; i++) {
if (swab32(hash[8 * 7 + i]) <= Htarg) {
pdata[19] = data[8 * 3 + i];
sha256d_80_swap(hash, pdata);
if (fulltest(hash, ptarget)) {
*hashes_done = n - first_nonce + 1;
return 1;
}
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
#endif /* HAVE_SHA256_8WAY */
int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t data[64] __attribute__((aligned(128)));
uint32_t hash[8] __attribute__((aligned(32)));
uint32_t midstate[8] __attribute__((aligned(32)));
uint32_t prehash[8] __attribute__((aligned(32)));
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
#ifdef HAVE_SHA256_8WAY
if (sha256_use_8way())
return scanhash_sha256d_8way(thr_id, pdata, ptarget,
max_nonce, hashes_done);
#endif
#ifdef HAVE_SHA256_4WAY
if (sha256_use_4way())
return scanhash_sha256d_4way(thr_id, pdata, ptarget,
max_nonce, hashes_done);
#endif
memcpy(data, pdata + 16, 64);
sha256d_preextend(data);
sha256_init(midstate);
sha256_transform(midstate, pdata, 0);
memcpy(prehash, midstate, 32);
sha256d_prehash(prehash, pdata + 16);
do {
data[3] = ++n;
sha256d_ms(hash, data, midstate, prehash);
if (swab32(hash[7]) <= Htarg) {
pdata[19] = data[3];
sha256d_80_swap(hash, pdata);
if (fulltest(hash, ptarget)) {
*hashes_done = n - first_nonce + 1;
return 1;
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
0707010000002D000081A4000003E800000064000000015EF4BCA100009AA3000000000000000000000000000000000000001600000000cpuminer-2.5.1/util.c/*
* Copyright 2010 Jeff Garzik
* Copyright 2012 Luke Dashjr
* Copyright 2012-2020 pooler
* Copyright 2017 Pieter Wuille
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#define _GNU_SOURCE
#include "cpuminer-config.h"
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdarg.h>
#include <string.h>
#include <stdbool.h>
#include <inttypes.h>
#include <limits.h>
#include <errno.h>
#include <unistd.h>
#include <jansson.h>
#include <curl/curl.h>
#include <time.h>
#if defined(WIN32)
#include <winsock2.h>
#include <mstcpip.h>
#else
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#endif
#include "compat.h"
#include "miner.h"
#include "elist.h"
struct data_buffer {
void *buf;
size_t len;
};
struct upload_buffer {
const void *buf;
size_t len;
size_t pos;
};
struct header_info {
char *lp_path;
char *reason;
char *stratum_url;
};
struct tq_ent {
void *data;
struct list_head q_node;
};
struct thread_q {
struct list_head q;
bool frozen;
pthread_mutex_t mutex;
pthread_cond_t cond;
};
void applog(int prio, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
#ifdef HAVE_SYSLOG_H
if (use_syslog) {
va_list ap2;
char *buf;
int len;
va_copy(ap2, ap);
len = vsnprintf(NULL, 0, fmt, ap2) + 1;
va_end(ap2);
buf = alloca(len);
if (vsnprintf(buf, len, fmt, ap) >= 0)
syslog(prio, "%s", buf);
}
#else
if (0) {}
#endif
else {
char *f;
int len;
time_t now;
struct tm tm, *tm_p;
time(&now);
pthread_mutex_lock(&applog_lock);
tm_p = localtime(&now);
memcpy(&tm, tm_p, sizeof(tm));
pthread_mutex_unlock(&applog_lock);
len = 40 + strlen(fmt) + 2;
f = alloca(len);
sprintf(f, "[%d-%02d-%02d %02d:%02d:%02d] %s\n",
tm.tm_year + 1900,
tm.tm_mon + 1,
tm.tm_mday,
tm.tm_hour,
tm.tm_min,
tm.tm_sec,
fmt);
pthread_mutex_lock(&applog_lock);
vfprintf(stderr, f, ap); /* atomic write to stderr */
fflush(stderr);
pthread_mutex_unlock(&applog_lock);
}
va_end(ap);
}
/* Modify the representation of integer numbers which would cause an overflow
* so that they are treated as floating-point numbers.
* This is a hack to overcome the limitations of some versions of Jansson. */
static char *hack_json_numbers(const char *in)
{
char *out;
int i, off, intoff;
bool in_str, in_int;
out = calloc(2 * strlen(in) + 1, 1);
if (!out)
return NULL;
off = intoff = 0;
in_str = in_int = false;
for (i = 0; in[i]; i++) {
char c = in[i];
if (c == '"') {
in_str = !in_str;
} else if (c == '\\') {
out[off++] = c;
if (!in[++i])
break;
} else if (!in_str && !in_int && isdigit(c)) {
intoff = off;
in_int = true;
} else if (in_int && !isdigit(c)) {
if (c != '.' && c != 'e' && c != 'E' && c != '+' && c != '-') {
in_int = false;
if (off - intoff > 4) {
char *end;
#if JSON_INTEGER_IS_LONG_LONG
errno = 0;
strtoll(out + intoff, &end, 10);
if (!*end && errno == ERANGE) {
#else
long l;
errno = 0;
l = strtol(out + intoff, &end, 10);
if (!*end && (errno == ERANGE || l > INT_MAX)) {
#endif
out[off++] = '.';
out[off++] = '0';
}
}
}
}
out[off++] = in[i];
}
return out;
}
static void databuf_free(struct data_buffer *db)
{
if (!db)
return;
free(db->buf);
memset(db, 0, sizeof(*db));
}
static size_t all_data_cb(const void *ptr, size_t size, size_t nmemb,
void *user_data)
{
struct data_buffer *db = user_data;
size_t len = size * nmemb;
size_t oldlen, newlen;
void *newmem;
static const unsigned char zero = 0;
oldlen = db->len;
newlen = oldlen + len;
newmem = realloc(db->buf, newlen + 1);
if (!newmem)
return 0;
db->buf = newmem;
db->len = newlen;
memcpy(db->buf + oldlen, ptr, len);
memcpy(db->buf + newlen, &zero, 1); /* null terminate */
return len;
}
static size_t upload_data_cb(void *ptr, size_t size, size_t nmemb,
void *user_data)
{
struct upload_buffer *ub = user_data;
int len = size * nmemb;
if (len > ub->len - ub->pos)
len = ub->len - ub->pos;
if (len) {
memcpy(ptr, ub->buf + ub->pos, len);
ub->pos += len;
}
return len;
}
#if LIBCURL_VERSION_NUM >= 0x071200
static int seek_data_cb(void *user_data, curl_off_t offset, int origin)
{
struct upload_buffer *ub = user_data;
switch (origin) {
case SEEK_SET:
ub->pos = offset;
break;
case SEEK_CUR:
ub->pos += offset;
break;
case SEEK_END:
ub->pos = ub->len + offset;
break;
default:
return 1; /* CURL_SEEKFUNC_FAIL */
}
return 0; /* CURL_SEEKFUNC_OK */
}
#endif
static size_t resp_hdr_cb(void *ptr, size_t size, size_t nmemb, void *user_data)
{
struct header_info *hi = user_data;
size_t remlen, slen, ptrlen = size * nmemb;
char *rem, *val = NULL, *key = NULL;
void *tmp;
val = calloc(1, ptrlen);
key = calloc(1, ptrlen);
if (!key || !val)
goto out;
tmp = memchr(ptr, ':', ptrlen);
if (!tmp || (tmp == ptr)) /* skip empty keys / blanks */
goto out;
slen = tmp - ptr;
if ((slen + 1) == ptrlen) /* skip key w/ no value */
goto out;
memcpy(key, ptr, slen); /* store & nul term key */
key[slen] = 0;
rem = ptr + slen + 1; /* trim value's leading whitespace */
remlen = ptrlen - slen - 1;
while ((remlen > 0) && (isspace(*rem))) {
remlen--;
rem++;
}
memcpy(val, rem, remlen); /* store value, trim trailing ws */
val[remlen] = 0;
while ((*val) && (isspace(val[strlen(val) - 1]))) {
val[strlen(val) - 1] = 0;
}
if (!*val) /* skip blank value */
goto out;
if (!strcasecmp("X-Long-Polling", key)) {
hi->lp_path = val; /* steal memory reference */
val = NULL;
}
if (!strcasecmp("X-Reject-Reason", key)) {
hi->reason = val; /* steal memory reference */
val = NULL;
}
if (!strcasecmp("X-Stratum", key)) {
hi->stratum_url = val; /* steal memory reference */
val = NULL;
}
out:
free(key);
free(val);
return ptrlen;
}
#if LIBCURL_VERSION_NUM >= 0x070f06
static int sockopt_keepalive_cb(void *userdata, curl_socket_t fd,
curlsocktype purpose)
{
int keepalive = 1;
int tcp_keepcnt = 3;
int tcp_keepidle = 50;
int tcp_keepintvl = 50;
#ifndef WIN32
if (unlikely(setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &keepalive,
sizeof(keepalive))))
return 1;
#ifdef __linux
if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPCNT,
&tcp_keepcnt, sizeof(tcp_keepcnt))))
return 1;
if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPIDLE,
&tcp_keepidle, sizeof(tcp_keepidle))))
return 1;
if (unlikely(setsockopt(fd, SOL_TCP, TCP_KEEPINTVL,
&tcp_keepintvl, sizeof(tcp_keepintvl))))
return 1;
#endif /* __linux */
#ifdef __APPLE_CC__
if (unlikely(setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE,
&tcp_keepintvl, sizeof(tcp_keepintvl))))
return 1;
#endif /* __APPLE_CC__ */
#else /* WIN32 */
struct tcp_keepalive vals;
vals.onoff = 1;
vals.keepalivetime = tcp_keepidle * 1000;
vals.keepaliveinterval = tcp_keepintvl * 1000;
DWORD outputBytes;
if (unlikely(WSAIoctl(fd, SIO_KEEPALIVE_VALS, &vals, sizeof(vals),
NULL, 0, &outputBytes, NULL, NULL)))
return 1;
#endif /* WIN32 */
return 0;
}
#endif
json_t *json_rpc_call(CURL *curl, const char *url,
const char *userpass, const char *rpc_req,
int *curl_err, int flags)
{
json_t *val, *err_val, *res_val;
int rc;
long http_rc;
struct data_buffer all_data = {0};
struct upload_buffer upload_data;
char *json_buf;
json_error_t err;
struct curl_slist *headers = NULL;
char len_hdr[64];
char curl_err_str[CURL_ERROR_SIZE];
long timeout = (flags & JSON_RPC_LONGPOLL) ? opt_timeout : 30;
struct header_info hi = {0};
/* it is assumed that 'curl' is freshly [re]initialized at this pt */
if (opt_protocol)
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
curl_easy_setopt(curl, CURLOPT_URL, url);
if (opt_cert)
curl_easy_setopt(curl, CURLOPT_CAINFO, opt_cert);
curl_easy_setopt(curl, CURLOPT_ENCODING, "");
curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, all_data_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &all_data);
curl_easy_setopt(curl, CURLOPT_READFUNCTION, upload_data_cb);
curl_easy_setopt(curl, CURLOPT_READDATA, &upload_data);
#if LIBCURL_VERSION_NUM >= 0x071200
curl_easy_setopt(curl, CURLOPT_SEEKFUNCTION, &seek_data_cb);
curl_easy_setopt(curl, CURLOPT_SEEKDATA, &upload_data);
#endif
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_err_str);
if (opt_redirect)
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, resp_hdr_cb);
curl_easy_setopt(curl, CURLOPT_HEADERDATA, &hi);
if (opt_proxy) {
curl_easy_setopt(curl, CURLOPT_PROXY, opt_proxy);
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, opt_proxy_type);
}
if (userpass) {
curl_easy_setopt(curl, CURLOPT_USERPWD, userpass);
curl_easy_setopt(curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
}
#if LIBCURL_VERSION_NUM >= 0x070f06
if (flags & JSON_RPC_LONGPOLL)
curl_easy_setopt(curl, CURLOPT_SOCKOPTFUNCTION, sockopt_keepalive_cb);
#endif
curl_easy_setopt(curl, CURLOPT_POST, 1);
if (opt_protocol)
applog(LOG_DEBUG, "JSON protocol request:\n%s\n", rpc_req);
upload_data.buf = rpc_req;
upload_data.len = strlen(rpc_req);
upload_data.pos = 0;
sprintf(len_hdr, "Content-Length: %lu",
(unsigned long) upload_data.len);
headers = curl_slist_append(headers, "Content-Type: application/json");
headers = curl_slist_append(headers, len_hdr);
headers = curl_slist_append(headers, "User-Agent: " USER_AGENT);
headers = curl_slist_append(headers, "X-Mining-Extensions: midstate");
headers = curl_slist_append(headers, "Accept:"); /* disable Accept hdr*/
headers = curl_slist_append(headers, "Expect:"); /* disable Expect hdr*/
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
rc = curl_easy_perform(curl);
if (curl_err != NULL)
*curl_err = rc;
if (rc) {
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_rc);
if (!((flags & JSON_RPC_LONGPOLL) && rc == CURLE_OPERATION_TIMEDOUT) &&
!((flags & JSON_RPC_QUIET_404) && http_rc == 404))
applog(LOG_ERR, "HTTP request failed: %s", curl_err_str);
if (curl_err && (flags & JSON_RPC_QUIET_404) && http_rc == 404)
*curl_err = CURLE_OK;
goto err_out;
}
/* If X-Stratum was found, activate Stratum */
if (want_stratum && hi.stratum_url &&
!strncasecmp(hi.stratum_url, "stratum+tcp://", 14)) {
have_stratum = true;
tq_push(thr_info[stratum_thr_id].q, hi.stratum_url);
hi.stratum_url = NULL;
}
/* If X-Long-Polling was found, activate long polling */
if (!have_longpoll && want_longpoll && hi.lp_path && !have_gbt &&
allow_getwork && !have_stratum) {
have_longpoll = true;
tq_push(thr_info[longpoll_thr_id].q, hi.lp_path);
hi.lp_path = NULL;
}
if (!all_data.buf) {
applog(LOG_ERR, "Empty data received in json_rpc_call.");
goto err_out;
}
json_buf = hack_json_numbers(all_data.buf);
errno = 0; /* needed for Jansson < 2.1 */
val = JSON_LOADS(json_buf, &err);
free(json_buf);
if (!val) {
applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text);
goto err_out;
}
if (opt_protocol) {
char *s = json_dumps(val, JSON_INDENT(3));
applog(LOG_DEBUG, "JSON protocol response:\n%s", s);
free(s);
}
/* JSON-RPC valid response returns a 'result' and a null 'error'. */
res_val = json_object_get(val, "result");
err_val = json_object_get(val, "error");
if (!res_val || (err_val && !json_is_null(err_val))) {
char *s;
if (err_val)
s = json_dumps(err_val, JSON_INDENT(3));
else
s = strdup("(unknown reason)");
applog(LOG_ERR, "JSON-RPC call failed: %s", s);
free(s);
goto err_out;
}
if (hi.reason)
json_object_set_new(val, "reject-reason", json_string(hi.reason));
databuf_free(&all_data);
curl_slist_free_all(headers);
curl_easy_reset(curl);
return val;
err_out:
free(hi.lp_path);
free(hi.reason);
free(hi.stratum_url);
databuf_free(&all_data);
curl_slist_free_all(headers);
curl_easy_reset(curl);
return NULL;
}
void memrev(unsigned char *p, size_t len)
{
unsigned char c, *q;
for (q = p + len - 1; p < q; p++, q--) {
c = *p;
*p = *q;
*q = c;
}
}
void bin2hex(char *s, const unsigned char *p, size_t len)
{
int i;
for (i = 0; i < len; i++)
sprintf(s + (i * 2), "%02x", (unsigned int) p[i]);
}
char *abin2hex(const unsigned char *p, size_t len)
{
char *s = malloc((len * 2) + 1);
if (!s)
return NULL;
bin2hex(s, p, len);
return s;
}
bool hex2bin(unsigned char *p, const char *hexstr, size_t len)
{
char hex_byte[3];
char *ep;
hex_byte[2] = '\0';
while (*hexstr && len) {
if (!hexstr[1]) {
applog(LOG_ERR, "hex2bin str truncated");
return false;
}
hex_byte[0] = hexstr[0];
hex_byte[1] = hexstr[1];
*p = (unsigned char) strtol(hex_byte, &ep, 16);
if (*ep) {
applog(LOG_ERR, "hex2bin failed on '%s'", hex_byte);
return false;
}
p++;
hexstr += 2;
len--;
}
return (len == 0 && *hexstr == 0) ? true : false;
}
int varint_encode(unsigned char *p, uint64_t n)
{
int i;
if (n < 0xfd) {
p[0] = n;
return 1;
}
if (n <= 0xffff) {
p[0] = 0xfd;
p[1] = n & 0xff;
p[2] = n >> 8;
return 3;
}
if (n <= 0xffffffff) {
p[0] = 0xfe;
for (i = 1; i < 5; i++) {
p[i] = n & 0xff;
n >>= 8;
}
return 5;
}
p[0] = 0xff;
for (i = 1; i < 9; i++) {
p[i] = n & 0xff;
n >>= 8;
}
return 9;
}
static const char b58digits[] = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
static bool b58dec(unsigned char *bin, size_t binsz, const char *b58)
{
size_t i, j;
uint64_t t;
uint32_t c;
uint32_t *outi;
size_t outisz = (binsz + 3) / 4;
int rem = binsz % 4;
uint32_t remmask = 0xffffffff << (8 * rem);
size_t b58sz = strlen(b58);
bool rc = false;
outi = calloc(outisz, sizeof(*outi));
for (i = 0; i < b58sz; ++i) {
for (c = 0; b58digits[c] != b58[i]; c++)
if (!b58digits[c])
goto out;
for (j = outisz; j--; ) {
t = (uint64_t)outi[j] * 58 + c;
c = t >> 32;
outi[j] = t & 0xffffffff;
}
if (c || outi[0] & remmask)
goto out;
}
j = 0;
switch (rem) {
case 3:
*(bin++) = (outi[0] >> 16) & 0xff;
case 2:
*(bin++) = (outi[0] >> 8) & 0xff;
case 1:
*(bin++) = outi[0] & 0xff;
++j;
default:
break;
}
for (; j < outisz; ++j) {
be32enc((uint32_t *)bin, outi[j]);
bin += sizeof(uint32_t);
}
rc = true;
out:
free(outi);
return rc;
}
static int b58check(unsigned char *bin, size_t binsz, const char *b58)
{
unsigned char buf[32];
int i;
sha256d(buf, bin, binsz - 4);
if (memcmp(&bin[binsz - 4], buf, 4))
return -1;
/* Check number of zeros is correct AFTER verifying checksum
* (to avoid possibility of accessing the string beyond the end) */
for (i = 0; bin[i] == '\0' && b58[i] == '1'; ++i);
if (bin[i] == '\0' || b58[i] == '1')
return -3;
return bin[0];
}
static uint32_t bech32_polymod_step(uint32_t pre) {
uint8_t b = pre >> 25;
return ((pre & 0x1FFFFFF) << 5) ^
(-((b >> 0) & 1) & 0x3b6a57b2UL) ^
(-((b >> 1) & 1) & 0x26508e6dUL) ^
(-((b >> 2) & 1) & 0x1ea119faUL) ^
(-((b >> 3) & 1) & 0x3d4233ddUL) ^
(-((b >> 4) & 1) & 0x2a1462b3UL);
}
static const int8_t bech32_charset_rev[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1,
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1,
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1
};
static bool bech32_decode(char *hrp, uint8_t *data, size_t *data_len, const char *input) {
uint32_t chk = 1;
size_t i;
size_t input_len = strlen(input);
size_t hrp_len;
int have_lower = 0, have_upper = 0;
if (input_len < 8 || input_len > 90) {
return false;
}
*data_len = 0;
while (*data_len < input_len && input[(input_len - 1) - *data_len] != '1') {
++(*data_len);
}
hrp_len = input_len - (1 + *data_len);
if (1 + *data_len >= input_len || *data_len < 6) {
return false;
}
*(data_len) -= 6;
for (i = 0; i < hrp_len; ++i) {
int ch = input[i];
if (ch < 33 || ch > 126) {
return false;
}
if (ch >= 'a' && ch <= 'z') {
have_lower = 1;
} else if (ch >= 'A' && ch <= 'Z') {
have_upper = 1;
ch = (ch - 'A') + 'a';
}
hrp[i] = ch;
chk = bech32_polymod_step(chk) ^ (ch >> 5);
}
hrp[i] = 0;
chk = bech32_polymod_step(chk);
for (i = 0; i < hrp_len; ++i) {
chk = bech32_polymod_step(chk) ^ (input[i] & 0x1f);
}
++i;
while (i < input_len) {
int v = (input[i] & 0x80) ? -1 : bech32_charset_rev[(int)input[i]];
if (input[i] >= 'a' && input[i] <= 'z') have_lower = 1;
if (input[i] >= 'A' && input[i] <= 'Z') have_upper = 1;
if (v == -1) {
return false;
}
chk = bech32_polymod_step(chk) ^ v;
if (i + 6 < input_len) {
data[i - (1 + hrp_len)] = v;
}
++i;
}
if (have_lower && have_upper) {
return false;
}
return chk == 1;
}
static bool convert_bits(uint8_t *out, size_t *outlen, int outbits, const uint8_t *in, size_t inlen, int inbits, int pad) {
uint32_t val = 0;
int bits = 0;
uint32_t maxv = (((uint32_t)1) << outbits) - 1;
while (inlen--) {
val = (val << inbits) | *(in++);
bits += inbits;
while (bits >= outbits) {
bits -= outbits;
out[(*outlen)++] = (val >> bits) & maxv;
}
}
if (pad) {
if (bits) {
out[(*outlen)++] = (val << (outbits - bits)) & maxv;
}
} else if (((val << (outbits - bits)) & maxv) || bits >= inbits) {
return false;
}
return true;
}
static bool segwit_addr_decode(int *witver, uint8_t *witdata, size_t *witdata_len, const char *addr) {
uint8_t data[84];
char hrp_actual[84];
size_t data_len;
if (!bech32_decode(hrp_actual, data, &data_len, addr)) return false;
if (data_len == 0 || data_len > 65) return false;
if (data[0] > 16) return false;
*witdata_len = 0;
if (!convert_bits(witdata, witdata_len, 8, data + 1, data_len - 1, 5, 0)) return false;
if (*witdata_len < 2 || *witdata_len > 40) return false;
if (data[0] == 0 && *witdata_len != 20 && *witdata_len != 32) return false;
*witver = data[0];
return true;
}
static size_t bech32_to_script(uint8_t *out, size_t outsz, const char *addr) {
uint8_t witprog[40];
size_t witprog_len;
int witver;
if (!segwit_addr_decode(&witver, witprog, &witprog_len, addr))
return 0;
if (outsz < witprog_len + 2)
return 0;
out[0] = witver ? (0x50 + witver) : 0;
out[1] = witprog_len;
memcpy(out + 2, witprog, witprog_len);
return witprog_len + 2;
}
size_t address_to_script(unsigned char *out, size_t outsz, const char *addr)
{
unsigned char addrbin[25];
int addrver;
size_t rv;
if (!b58dec(addrbin, sizeof(addrbin), addr))
return bech32_to_script(out, outsz, addr);
addrver = b58check(addrbin, sizeof(addrbin), addr);
if (addrver < 0)
return 0;
switch (addrver) {
case 5: /* Bitcoin script hash */
case 196: /* Testnet script hash */
if (outsz < (rv = 23))
return rv;
out[ 0] = 0xa9; /* OP_HASH160 */
out[ 1] = 0x14; /* push 20 bytes */
memcpy(&out[2], &addrbin[1], 20);
out[22] = 0x87; /* OP_EQUAL */
return rv;
default:
if (outsz < (rv = 25))
return rv;
out[ 0] = 0x76; /* OP_DUP */
out[ 1] = 0xa9; /* OP_HASH160 */
out[ 2] = 0x14; /* push 20 bytes */
memcpy(&out[3], &addrbin[1], 20);
out[23] = 0x88; /* OP_EQUALVERIFY */
out[24] = 0xac; /* OP_CHECKSIG */
return rv;
}
}
/* Subtract the `struct timeval' values X and Y,
storing the result in RESULT.
Return 1 if the difference is negative, otherwise 0. */
int timeval_subtract(struct timeval *result, struct timeval *x,
struct timeval *y)
{
/* Perform the carry for the later subtraction by updating Y. */
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}
/* Compute the time remaining to wait.
* `tv_usec' is certainly positive. */
result->tv_sec = x->tv_sec - y->tv_sec;
result->tv_usec = x->tv_usec - y->tv_usec;
/* Return 1 if result is negative. */
return x->tv_sec < y->tv_sec;
}
bool fulltest(const uint32_t *hash, const uint32_t *target)
{
int i;
bool rc = true;
for (i = 7; i >= 0; i--) {
if (hash[i] > target[i]) {
rc = false;
break;
}
if (hash[i] < target[i]) {
rc = true;
break;
}
}
if (opt_debug) {
uint32_t hash_be[8], target_be[8];
char hash_str[65], target_str[65];
for (i = 0; i < 8; i++) {
be32enc(hash_be + i, hash[7 - i]);
be32enc(target_be + i, target[7 - i]);
}
bin2hex(hash_str, (unsigned char *)hash_be, 32);
bin2hex(target_str, (unsigned char *)target_be, 32);
applog(LOG_DEBUG, "DEBUG: %s\nHash: %s\nTarget: %s",
rc ? "hash <= target"
: "hash > target (false positive)",
hash_str,
target_str);
}
return rc;
}
void diff_to_target(uint32_t *target, double diff)
{
uint64_t m;
int k;
for (k = 6; k > 0 && diff > 1.0; k--)
diff /= 4294967296.0;
m = 4294901760.0 / diff;
if (m == 0 && k == 6)
memset(target, 0xff, 32);
else {
memset(target, 0, 32);
target[k] = (uint32_t)m;
target[k + 1] = (uint32_t)(m >> 32);
}
}
#ifdef WIN32
#define socket_blocks() (WSAGetLastError() == WSAEWOULDBLOCK)
#else
#define socket_blocks() (errno == EAGAIN || errno == EWOULDBLOCK)
#endif
static bool send_line(struct stratum_ctx *sctx, char *s)
{
ssize_t len, sent = 0;
len = strlen(s);
s[len++] = '\n';
while (len > 0) {
struct timeval timeout = {0, 0};
ssize_t n;
fd_set wd;
FD_ZERO(&wd);
FD_SET(sctx->sock, &wd);
if (select(sctx->sock + 1, NULL, &wd, NULL, &timeout) < 1)
return false;
#if LIBCURL_VERSION_NUM >= 0x071202
CURLcode rc = curl_easy_send(sctx->curl, s + sent, len, (size_t *)&n);
if (rc != CURLE_OK) {
if (rc != CURLE_AGAIN)
#else
n = send(sctx->sock, s + sent, len, 0);
if (n < 0) {
if (!socket_blocks())
#endif
return false;
n = 0;
}
sent += n;
len -= n;
}
return true;
}
bool stratum_send_line(struct stratum_ctx *sctx, char *s)
{
bool ret = false;
if (opt_protocol)
applog(LOG_DEBUG, "> %s", s);
pthread_mutex_lock(&sctx->sock_lock);
ret = send_line(sctx, s);
pthread_mutex_unlock(&sctx->sock_lock);
return ret;
}
static bool socket_full(curl_socket_t sock, int timeout)
{
struct timeval tv;
fd_set rd;
FD_ZERO(&rd);
FD_SET(sock, &rd);
tv.tv_sec = timeout;
tv.tv_usec = 0;
if (select(sock + 1, &rd, NULL, NULL, &tv) > 0)
return true;
return false;
}
bool stratum_socket_full(struct stratum_ctx *sctx, int timeout)
{
return strlen(sctx->sockbuf) || socket_full(sctx->sock, timeout);
}
#define RBUFSIZE 2048
#define RECVSIZE (RBUFSIZE - 4)
static void stratum_buffer_append(struct stratum_ctx *sctx, const char *s)
{
size_t old, new;
old = strlen(sctx->sockbuf);
new = old + strlen(s) + 1;
if (new >= sctx->sockbuf_size) {
sctx->sockbuf_size = new + (RBUFSIZE - (new % RBUFSIZE));
sctx->sockbuf = realloc(sctx->sockbuf, sctx->sockbuf_size);
}
strcpy(sctx->sockbuf + old, s);
}
char *stratum_recv_line(struct stratum_ctx *sctx)
{
ssize_t len, buflen;
char *tok, *sret = NULL;
if (!strstr(sctx->sockbuf, "\n")) {
bool ret = true;
time_t rstart;
time(&rstart);
if (!socket_full(sctx->sock, 60)) {
applog(LOG_ERR, "stratum_recv_line timed out");
goto out;
}
do {
char s[RBUFSIZE];
ssize_t n;
memset(s, 0, RBUFSIZE);
#if LIBCURL_VERSION_NUM >= 0x071202
CURLcode rc = curl_easy_recv(sctx->curl, s, RECVSIZE, (size_t *)&n);
if (rc == CURLE_OK && !n) {
ret = false;
break;
}
if (rc != CURLE_OK) {
if (rc != CURLE_AGAIN || !socket_full(sctx->sock, 1)) {
#else
n = recv(sctx->sock, s, RECVSIZE, 0);
if (!n) {
ret = false;
break;
}
if (n < 0) {
if (!socket_blocks() || !socket_full(sctx->sock, 1)) {
#endif
ret = false;
break;
}
} else
stratum_buffer_append(sctx, s);
} while (time(NULL) - rstart < 60 && !strstr(sctx->sockbuf, "\n"));
if (!ret) {
applog(LOG_ERR, "stratum_recv_line failed");
goto out;
}
}
buflen = strlen(sctx->sockbuf);
tok = strtok(sctx->sockbuf, "\n");
if (!tok) {
applog(LOG_ERR, "stratum_recv_line failed to parse a newline-terminated string");
goto out;
}
sret = strdup(tok);
len = strlen(sret);
if (buflen > len + 1)
memmove(sctx->sockbuf, sctx->sockbuf + len + 1, buflen - len + 1);
else
sctx->sockbuf[0] = '\0';
out:
if (sret && opt_protocol)
applog(LOG_DEBUG, "< %s", sret);
return sret;
}
#if LIBCURL_VERSION_NUM >= 0x071101 && LIBCURL_VERSION_NUM < 0x072d00
static curl_socket_t opensocket_grab_cb(void *clientp, curlsocktype purpose,
struct curl_sockaddr *addr)
{
curl_socket_t *sock = clientp;
*sock = socket(addr->family, addr->socktype, addr->protocol);
return *sock;
}
#endif
bool stratum_connect(struct stratum_ctx *sctx, const char *url)
{
CURL *curl;
int rc;
pthread_mutex_lock(&sctx->sock_lock);
if (sctx->curl)
curl_easy_cleanup(sctx->curl);
sctx->curl = curl_easy_init();
if (!sctx->curl) {
applog(LOG_ERR, "CURL initialization failed");
pthread_mutex_unlock(&sctx->sock_lock);
return false;
}
curl = sctx->curl;
if (!sctx->sockbuf) {
sctx->sockbuf = calloc(RBUFSIZE, 1);
sctx->sockbuf_size = RBUFSIZE;
}
sctx->sockbuf[0] = '\0';
pthread_mutex_unlock(&sctx->sock_lock);
if (url != sctx->url) {
free(sctx->url);
sctx->url = strdup(url);
}
free(sctx->curl_url);
sctx->curl_url = malloc(strlen(url));
sprintf(sctx->curl_url, "http%s", url + 11);
if (opt_protocol)
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
curl_easy_setopt(curl, CURLOPT_URL, sctx->curl_url);
if (opt_cert)
curl_easy_setopt(curl, CURLOPT_CAINFO, opt_cert);
curl_easy_setopt(curl, CURLOPT_FRESH_CONNECT, 1);
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 30);
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, sctx->curl_err_str);
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1);
if (opt_proxy) {
curl_easy_setopt(curl, CURLOPT_PROXY, opt_proxy);
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, opt_proxy_type);
}
curl_easy_setopt(curl, CURLOPT_HTTPPROXYTUNNEL, 1);
#if LIBCURL_VERSION_NUM >= 0x070f06
curl_easy_setopt(curl, CURLOPT_SOCKOPTFUNCTION, sockopt_keepalive_cb);
#endif
#if LIBCURL_VERSION_NUM >= 0x071101 && LIBCURL_VERSION_NUM < 0x072d00
curl_easy_setopt(curl, CURLOPT_OPENSOCKETFUNCTION, opensocket_grab_cb);
curl_easy_setopt(curl, CURLOPT_OPENSOCKETDATA, &sctx->sock);
#endif
curl_easy_setopt(curl, CURLOPT_CONNECT_ONLY, 1);
rc = curl_easy_perform(curl);
if (rc) {
applog(LOG_ERR, "Stratum connection failed: %s", sctx->curl_err_str);
curl_easy_cleanup(curl);
sctx->curl = NULL;
return false;
}
#if LIBCURL_VERSION_NUM >= 0x072d00
curl_easy_getinfo(curl, CURLINFO_ACTIVESOCKET, &sctx->sock);
#elif LIBCURL_VERSION_NUM < 0x071101
/* CURLINFO_LASTSOCKET is broken on Win64; only use it as a last resort */
curl_easy_getinfo(curl, CURLINFO_LASTSOCKET, (long *)&sctx->sock);
#endif
return true;
}
void stratum_disconnect(struct stratum_ctx *sctx)
{
pthread_mutex_lock(&sctx->sock_lock);
if (sctx->curl) {
curl_easy_cleanup(sctx->curl);
sctx->curl = NULL;
sctx->sockbuf[0] = '\0';
}
pthread_mutex_unlock(&sctx->sock_lock);
}
static const char *get_stratum_session_id(json_t *val)
{
json_t *arr_val;
int i, n;
arr_val = json_array_get(val, 0);
if (!arr_val || !json_is_array(arr_val))
return NULL;
n = json_array_size(arr_val);
for (i = 0; i < n; i++) {
const char *notify;
json_t *arr = json_array_get(arr_val, i);
if (!arr || !json_is_array(arr))
break;
notify = json_string_value(json_array_get(arr, 0));
if (!notify)
continue;
if (!strcasecmp(notify, "mining.notify"))
return json_string_value(json_array_get(arr, 1));
}
return NULL;
}
bool stratum_subscribe(struct stratum_ctx *sctx)
{
char *s, *sret = NULL;
const char *sid, *xnonce1;
int xn2_size;
json_t *val = NULL, *res_val, *err_val;
json_error_t err;
bool ret = false, retry = false;
start:
s = malloc(128 + (sctx->session_id ? strlen(sctx->session_id) : 0));
if (retry)
sprintf(s, "{\"id\": 1, \"method\": \"mining.subscribe\", \"params\": []}");
else if (sctx->session_id)
sprintf(s, "{\"id\": 1, \"method\": \"mining.subscribe\", \"params\": [\"" USER_AGENT "\", \"%s\"]}", sctx->session_id);
else
sprintf(s, "{\"id\": 1, \"method\": \"mining.subscribe\", \"params\": [\"" USER_AGENT "\"]}");
if (!stratum_send_line(sctx, s)) {
applog(LOG_ERR, "stratum_subscribe send failed");
goto out;
}
if (!socket_full(sctx->sock, 30)) {
applog(LOG_ERR, "stratum_subscribe timed out");
goto out;
}
sret = stratum_recv_line(sctx);
if (!sret)
goto out;
val = JSON_LOADS(sret, &err);
free(sret);
if (!val) {
applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text);
goto out;
}
res_val = json_object_get(val, "result");
err_val = json_object_get(val, "error");
if (!res_val || json_is_null(res_val) ||
(err_val && !json_is_null(err_val))) {
if (opt_debug || retry) {
free(s);
if (err_val)
s = json_dumps(err_val, JSON_INDENT(3));
else
s = strdup("(unknown reason)");
applog(LOG_ERR, "JSON-RPC call failed: %s", s);
}
goto out;
}
sid = get_stratum_session_id(res_val);
if (opt_debug && !sid)
applog(LOG_DEBUG, "Failed to get Stratum session id");
xnonce1 = json_string_value(json_array_get(res_val, 1));
if (!xnonce1) {
applog(LOG_ERR, "Failed to get extranonce1");
goto out;
}
xn2_size = json_integer_value(json_array_get(res_val, 2));
if (!xn2_size) {
applog(LOG_ERR, "Failed to get extranonce2_size");
goto out;
}
if (xn2_size < 0 || xn2_size > 100) {
applog(LOG_ERR, "Invalid value of extranonce2_size");
goto out;
}
pthread_mutex_lock(&sctx->work_lock);
free(sctx->session_id);
free(sctx->xnonce1);
sctx->session_id = sid ? strdup(sid) : NULL;
sctx->xnonce1_size = strlen(xnonce1) / 2;
sctx->xnonce1 = malloc(sctx->xnonce1_size);
hex2bin(sctx->xnonce1, xnonce1, sctx->xnonce1_size);
sctx->xnonce2_size = xn2_size;
sctx->next_diff = 1.0;
pthread_mutex_unlock(&sctx->work_lock);
if (opt_debug && sid)
applog(LOG_DEBUG, "Stratum session id: %s", sctx->session_id);
ret = true;
out:
free(s);
if (val)
json_decref(val);
if (!ret) {
if (sret && !retry) {
retry = true;
goto start;
}
}
return ret;
}
bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass)
{
json_t *val = NULL, *res_val, *err_val;
char *s, *sret;
json_error_t err;
bool ret = false;
s = malloc(80 + strlen(user) + strlen(pass));
sprintf(s, "{\"id\": 2, \"method\": \"mining.authorize\", \"params\": [\"%s\", \"%s\"]}",
user, pass);
if (!stratum_send_line(sctx, s))
goto out;
while (1) {
sret = stratum_recv_line(sctx);
if (!sret)
goto out;
if (!stratum_handle_method(sctx, sret))
break;
free(sret);
}
val = JSON_LOADS(sret, &err);
free(sret);
if (!val) {
applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text);
goto out;
}
res_val = json_object_get(val, "result");
err_val = json_object_get(val, "error");
if (!res_val || json_is_false(res_val) ||
(err_val && !json_is_null(err_val))) {
applog(LOG_ERR, "Stratum authentication failed");
goto out;
}
ret = true;
out:
free(s);
if (val)
json_decref(val);
return ret;
}
static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
{
const char *job_id, *prevhash, *coinb1, *coinb2, *version, *nbits, *ntime;
size_t coinb1_size, coinb2_size;
bool clean, ret = false;
int merkle_count, i;
json_t *merkle_arr;
unsigned char **merkle;
job_id = json_string_value(json_array_get(params, 0));
prevhash = json_string_value(json_array_get(params, 1));
coinb1 = json_string_value(json_array_get(params, 2));
coinb2 = json_string_value(json_array_get(params, 3));
merkle_arr = json_array_get(params, 4);
if (!merkle_arr || !json_is_array(merkle_arr))
goto out;
merkle_count = json_array_size(merkle_arr);
version = json_string_value(json_array_get(params, 5));
nbits = json_string_value(json_array_get(params, 6));
ntime = json_string_value(json_array_get(params, 7));
clean = json_is_true(json_array_get(params, 8));
if (!job_id || !prevhash || !coinb1 || !coinb2 || !version || !nbits || !ntime ||
strlen(prevhash) != 64 || strlen(version) != 8 ||
strlen(nbits) != 8 || strlen(ntime) != 8) {
applog(LOG_ERR, "Stratum notify: invalid parameters");
goto out;
}
merkle = malloc(merkle_count * sizeof(char *));
for (i = 0; i < merkle_count; i++) {
const char *s = json_string_value(json_array_get(merkle_arr, i));
if (!s || strlen(s) != 64) {
while (i--)
free(merkle[i]);
free(merkle);
applog(LOG_ERR, "Stratum notify: invalid Merkle branch");
goto out;
}
merkle[i] = malloc(32);
hex2bin(merkle[i], s, 32);
}
pthread_mutex_lock(&sctx->work_lock);
coinb1_size = strlen(coinb1) / 2;
coinb2_size = strlen(coinb2) / 2;
sctx->job.coinbase_size = coinb1_size + sctx->xnonce1_size +
sctx->xnonce2_size + coinb2_size;
sctx->job.coinbase = realloc(sctx->job.coinbase, sctx->job.coinbase_size);
sctx->job.xnonce2 = sctx->job.coinbase + coinb1_size + sctx->xnonce1_size;
hex2bin(sctx->job.coinbase, coinb1, coinb1_size);
memcpy(sctx->job.coinbase + coinb1_size, sctx->xnonce1, sctx->xnonce1_size);
if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id))
memset(sctx->job.xnonce2, 0, sctx->xnonce2_size);
hex2bin(sctx->job.xnonce2 + sctx->xnonce2_size, coinb2, coinb2_size);
free(sctx->job.job_id);
sctx->job.job_id = strdup(job_id);
hex2bin(sctx->job.prevhash, prevhash, 32);
for (i = 0; i < sctx->job.merkle_count; i++)
free(sctx->job.merkle[i]);
free(sctx->job.merkle);
sctx->job.merkle = merkle;
sctx->job.merkle_count = merkle_count;
hex2bin(sctx->job.version, version, 4);
hex2bin(sctx->job.nbits, nbits, 4);
hex2bin(sctx->job.ntime, ntime, 4);
sctx->job.clean = clean;
sctx->job.diff = sctx->next_diff;
pthread_mutex_unlock(&sctx->work_lock);
ret = true;
out:
return ret;
}
static bool stratum_set_difficulty(struct stratum_ctx *sctx, json_t *params)
{
double diff;
diff = json_number_value(json_array_get(params, 0));
if (diff == 0)
return false;
pthread_mutex_lock(&sctx->work_lock);
sctx->next_diff = diff;
pthread_mutex_unlock(&sctx->work_lock);
if (opt_debug)
applog(LOG_DEBUG, "Stratum difficulty set to %g", diff);
return true;
}
static bool stratum_reconnect(struct stratum_ctx *sctx, json_t *params)
{
json_t *port_val;
char *url;
const char *host;
int port;
host = json_string_value(json_array_get(params, 0));
port_val = json_array_get(params, 1);
if (json_is_string(port_val))
port = atoi(json_string_value(port_val));
else
port = json_integer_value(port_val);
if (!host || !port)
return false;
url = malloc(32 + strlen(host));
strncpy(url, sctx->url, 15);
sprintf(strstr(url, "://") + 3, "%s:%d", host, port);
if (!opt_redirect) {
applog(LOG_INFO, "Ignoring request to reconnect to %s", url);
free(url);
return true;
}
applog(LOG_NOTICE, "Server requested reconnection to %s", url);
free(sctx->url);
sctx->url = url;
stratum_disconnect(sctx);
return true;
}
static bool stratum_get_version(struct stratum_ctx *sctx, json_t *id)
{
char *s;
json_t *val;
bool ret;
if (!id || json_is_null(id))
return false;
val = json_object();
json_object_set(val, "id", id);
json_object_set_new(val, "error", json_null());
json_object_set_new(val, "result", json_string(USER_AGENT));
s = json_dumps(val, 0);
ret = stratum_send_line(sctx, s);
json_decref(val);
free(s);
return ret;
}
static bool stratum_show_message(struct stratum_ctx *sctx, json_t *id, json_t *params)
{
char *s;
json_t *val;
bool ret;
val = json_array_get(params, 0);
if (val)
applog(LOG_NOTICE, "MESSAGE FROM SERVER: %s", json_string_value(val));
if (!id || json_is_null(id))
return true;
val = json_object();
json_object_set(val, "id", id);
json_object_set_new(val, "error", json_null());
json_object_set_new(val, "result", json_true());
s = json_dumps(val, 0);
ret = stratum_send_line(sctx, s);
json_decref(val);
free(s);
return ret;
}
bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
{
json_t *val, *id, *params;
json_error_t err;
const char *method;
bool ret = false;
val = JSON_LOADS(s, &err);
if (!val) {
applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text);
goto out;
}
method = json_string_value(json_object_get(val, "method"));
if (!method)
goto out;
id = json_object_get(val, "id");
params = json_object_get(val, "params");
if (!strcasecmp(method, "mining.notify")) {
ret = stratum_notify(sctx, params);
goto out;
}
if (!strcasecmp(method, "mining.set_difficulty")) {
ret = stratum_set_difficulty(sctx, params);
goto out;
}
if (!strcasecmp(method, "client.reconnect")) {
ret = stratum_reconnect(sctx, params);
goto out;
}
if (!strcasecmp(method, "client.get_version")) {
ret = stratum_get_version(sctx, id);
goto out;
}
if (!strcasecmp(method, "client.show_message")) {
ret = stratum_show_message(sctx, id, params);
goto out;
}
out:
if (val)
json_decref(val);
return ret;
}
struct thread_q *tq_new(void)
{
struct thread_q *tq;
tq = calloc(1, sizeof(*tq));
if (!tq)
return NULL;
INIT_LIST_HEAD(&tq->q);
pthread_mutex_init(&tq->mutex, NULL);
pthread_cond_init(&tq->cond, NULL);
return tq;
}
void tq_free(struct thread_q *tq)
{
struct tq_ent *ent, *iter;
if (!tq)
return;
list_for_each_entry_safe(ent, iter, &tq->q, q_node, struct tq_ent) {
list_del(&ent->q_node);
free(ent);
}
pthread_cond_destroy(&tq->cond);
pthread_mutex_destroy(&tq->mutex);
memset(tq, 0, sizeof(*tq)); /* poison */
free(tq);
}
static void tq_freezethaw(struct thread_q *tq, bool frozen)
{
pthread_mutex_lock(&tq->mutex);
tq->frozen = frozen;
pthread_cond_signal(&tq->cond);
pthread_mutex_unlock(&tq->mutex);
}
void tq_freeze(struct thread_q *tq)
{
tq_freezethaw(tq, true);
}
void tq_thaw(struct thread_q *tq)
{
tq_freezethaw(tq, false);
}
bool tq_push(struct thread_q *tq, void *data)
{
struct tq_ent *ent;
bool rc = true;
ent = calloc(1, sizeof(*ent));
if (!ent)
return false;
ent->data = data;
INIT_LIST_HEAD(&ent->q_node);
pthread_mutex_lock(&tq->mutex);
if (!tq->frozen) {
list_add_tail(&ent->q_node, &tq->q);
} else {
free(ent);
rc = false;
}
pthread_cond_signal(&tq->cond);
pthread_mutex_unlock(&tq->mutex);
return rc;
}
void *tq_pop(struct thread_q *tq, const struct timespec *abstime)
{
struct tq_ent *ent;
void *rval = NULL;
int rc;
pthread_mutex_lock(&tq->mutex);
if (!list_empty(&tq->q))
goto pop;
if (abstime)
rc = pthread_cond_timedwait(&tq->cond, &tq->mutex, abstime);
else
rc = pthread_cond_wait(&tq->cond, &tq->mutex);
if (rc)
goto out;
if (list_empty(&tq->q))
goto out;
pop:
ent = list_entry(tq->q.next, struct tq_ent, q_node);
rval = ent->data;
list_del(&ent->q_node);
free(ent);
out:
pthread_mutex_unlock(&tq->mutex);
return rval;
}
07070100000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000B00000000TRAILER!!!1244 blocks