File mergerfs-tools-0.0+git.20230911T192423~80d6c95.obscpio of Package mergerfs-tools
07070100000000000081A400000000000000000000000164FFAFB7000002D6000000000000000000000000000000000000003A00000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/.gitignore# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
07070100000001000081A400000000000000000000000164FFAFB7000002E2000000000000000000000000000000000000003700000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/LICENSECopyright (c) 2016, Antonio SJ Musumeci
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
07070100000002000081A400000000000000000000000164FFAFB7000005CF000000000000000000000000000000000000003800000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/Makefile# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
INSTALL = $(shell which install)
PREFIX = /usr/local
EXEC_PREFIX = $(PREFIX)
DATAROOTDIR = $(PREFIX)/share
DATADIR = $(DATAROOTDIR)
BINDIR = $(EXEC_PREFIX)/bin
SBINDIR = $(EXEC_PREFIX)/sbin
MANDIR = $(DATAROOTDIR)/man
MAN1DIR = $(MANDIR)/man1
INSTALLBINDIR = $(DESTDIR)$(BINDIR)
INSTALLSBINDIR = $(DESTDIR)$(SBINDIR)
INSTALLMAN1DIR = $(DESTDIR)$(MAN1DIR)
APPS = mergerfs.fsck \
mergerfs.mktrash \
mergerfs.dup \
mergerfs.dedup \
mergerfs.ctl \
mergerfs.balance \
mergerfs.consolidate
install:
@for APP in $(APPS); \
do \
$(INSTALL) -v -m 0755 -D "src/$$APP" "$(INSTALLBINDIR)/$$APP"; \
done
07070100000003000081A400000000000000000000000164FFAFB700002C5E000000000000000000000000000000000000003900000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/README.md# mergerfs-tools
Optional tools to help manage data in a mergerfs pool.
## INSTALL
All of these suplimental tools are self contained Python3 apps. Make sure you have Python 3 installed and either run `make install` or copy the file to `/usr/local/bin` or wherever you keep your binarys and make it executable (chmod +x).
## TOOLS
### mergerfs.ctl
A wrapper around the mergerfs xattr interface.
[Download latest](https://raw.githubusercontent.com/trapexit/mergerfs-tools/master/src/mergerfs.ctl)
```
$ mergerfs.ctl -h
usage: mergerfs.ctl [-h] [-m MOUNT] {add,remove,list,get,set,info} ...
positional arguments:
{add,remove,list,get,set,info}
optional arguments:
-h, --help show this help message and exit
-m MOUNT, --mount MOUNT
mergerfs mount to act on
$ mergerfs.ctl info
- mount: /storage
version: 2.14.0
pid: 1234
srcmounts:
- /mnt/drive0
- /mnt/drive1
$ mergerfs.ctl -m /storage add path /mnt/drive2
$ mergerfs.ctl info
- mount: /storage
version: 2.14.0
pid: 1234
srcmounts:
- /mnt/drive0
- /mnt/drive1
- /mnt/drive2
```
### mergerfs.fsck
Audits permissions and ownership of files and directories in a mergerfs mount and allows for manual and automatic fixing of them.
It's possible that files or directories can be duplicated across multiple drives and that their metadata become out of sync. Permissions, ownership, etc. This can cause some strange behavior depending on the mergerfs policies used. This tool helps find and fix those inconsistancies.
[Download latest](https://raw.githubusercontent.com/trapexit/mergerfs-tools/master/src/mergerfs.fsck)
```
$ mergerfs.fsck -h
usage: mergerfs.fsck [-h] [-v] [-s] [-f {manual,newest,nonroot}] dir
audit a mergerfs mount for inconsistencies
positional arguments:
dir starting directory
optional arguments:
-h, --help show this help message and exit
-v, --verbose print details of audit item
-s, --size only consider if the size is the same
-f {manual,newest,nonroot}, --fix {manual,newest,nonroot}
fix policy
$ mergerfs.fsck -v -f manual /path/to/dir
```
### mergerfs.dup
Duplicates files & directories across branches in a pool. The file selected for duplication is picked by the `dup` option. Files will be copied to drives with the most free space. Deleted from others if `prune` is enabled.
See usage for more. Run as `root`. Requires `rsync` to be installed.
[Download latest](https://raw.githubusercontent.com/trapexit/mergerfs-tools/master/src/mergerfs.dup)
```
usage: mergerfs.dup [<options>] <dir>
Duplicate files & directories across multiple drives in a pool.
Will print out commands for inspection and out of band use.
positional arguments:
dir starting directory
optional arguments:
-c, --count= Number of copies to create. (default: 2)
-d, --dup= Which file (if more than one exists) to choose to
duplicate. Each one falls back to `mergerfs` if
all files have the same value. (default: newest)
* newest : file with largest mtime
* oldest : file with smallest mtime
* smallest : file with smallest size
* largest : file with largest size
* mergerfs : file chosen by mergerfs' getattr
-p, --prune Remove files above `count`. Without this enabled
it will update all existing files.
-e, --execute Execute `rsync` and `rm` commands. Not just
print them.
-I, --include= fnmatch compatible filter to include files.
Can be used multiple times.
-E, --exclude= fnmatch compatible filter to exclude files.
Can be used multiple times.
```
### mergerfs.dedup
Finds and removes duplicate files across mergerfs pool's branches. Use the
`ignore`, `dedup`, and `strict` options to target specific use cases.
[Download latest](https://raw.githubusercontent.com/trapexit/mergerfs-tools/master/src/mergerfs.dedup)
```
usage: mergerfs.dedup [<options>] <dir>
Remove duplicate files across branches of a mergerfs pool. Provides
multiple algos for determining which file to keep and what to skip.
positional arguments:
dir Starting directory
optional arguments:
-v, --verbose Once to print `rm` commands
Twice for status info
Three for file info
-i, --ignore= Ignore files if... (default: none)
* same-size : have the same size
* different-size : have different sizes
* same-time : have the same mtime
* different-time : have different mtimes
* same-hash : have the same md5sum
* different-hash : have different md5sums
-d, --dedup= What file to *keep* (default: newest)
* manual : ask user
* oldest : file with smallest mtime
* newest : file with largest mtime
* largest : file with largest size
* smallest : file with smallest size
* mostfreespace : file on drive with most free space
-s, --strict Skip dedup if all files have same value.
Only applies to oldest, newest, largest, smallest.
-e, --execute Will not perform file removal without this.
-I, --include= fnmatch compatible filter to include files.
Can be used multiple times.
-E, --exclude= fnmatch compatible filter to exclude files.
Can be used multiple times.
# mergerfs.dedup /path/to/dir
# Total savings: 10.0GB
# mergerfs.dedup -e -d newest /path/to/dir
mergerfs.dedup -v -d newest /media/tmp/test
rm -vf /mnt/drive0/test/foo
rm -vf /mnt/drive1/test/foo
rm -vf /mnt/drive2/test/foo
rm -vf /mnt/drive3/test/foo
# Total savings: 10.0B
```
### mergerfs.balance
Will move files from the most filled drive (percentage wise) to the least filled drive. Will do so till the most and least filled drives come within a user defined percentage range (defaults to 2%).
Run as `root`. Requires `rsync` to be installed.
[Download latest](https://raw.githubusercontent.com/trapexit/mergerfs-tools/master/src/mergerfs.balance)
```
usage: mergerfs.balance [-h] [-p PERCENTAGE] [-i INCLUDE] [-e EXCLUDE]
[-I INCLUDEPATH] [-E EXCLUDEPATH] [-s EXCLUDELT]
[-S EXCLUDEGT]
dir
balance files on a mergerfs mount based on percentage drive filled
positional arguments:
dir starting directory
optional arguments:
-h, --help show this help message and exit
-p PERCENTAGE percentage range of freespace (default 2.0)
-i INCLUDE, --include INCLUDE
fnmatch compatible file filter (can use multiple
times)
-e EXCLUDE, --exclude EXCLUDE
fnmatch compatible file filter (can use multiple
times)
-I INCLUDEPATH, --include-path INCLUDEPATH
fnmatch compatible path filter (can use multiple
times)
-E EXCLUDEPATH, --exclude-path EXCLUDEPATH
fnmatch compatible path filter (can use multiple
times)
-s EXCLUDELT exclude files smaller than <int>[KMGT] bytes
-S EXCLUDEGT exclude files larger than <int>[KMGT] bytes
# mergerfs.balance /media
from: /mnt/drive1/foo/bar
to: /mnt/drive2/foo/bar
rsync ...
```
### mergerfs.consolidate
Consolidate **files** in a **single** mergerfs directory onto a **single** drive, recursively. This does **NOT** move all files at and below that directory to 1 drive. If you want to move data between drives simply use normal rsync or similar. This tool is only useful in niche usecases where the person wants to colocate files of their TV, music, etc. files onto a single drive *after the fact.* If you really wanted that you should probably use path preservation. For most people there is only downsides to using path preservation or colocating files.
Run as `root`. Requires `rsync` to be installed.
[Download latest](https://raw.githubusercontent.com/trapexit/mergerfs-tools/master/src/mergerfs.consolidate)
```
usage: mergerfs.consolidate [<options>] <dir>
positional arguments:
dir starting directory
optional arguments:
-m, --max-files= Skip directories with more than N files.
(default: 256)
-M, --max-size= Skip directories with files adding up to more
than N. (default: 16G)
-I, --include-path= fnmatch compatible path include filter.
Can be used multiple times.
-E, --exclude-path= fnmatch compatible path exclude filter.
Can be used multiple times.
-e, --execute Execute `rsync` commands as well as print them.
-h, --help Print this help.
```
## SUPPORT
#### Contact / Issue submission
* github.com: https://github.com/trapexit/mergerfs-tools/issues
* email: trapexit@spawn.link
* twitter: https://twitter.com/_trapexit
#### Support development
This software is free to use and released under a very liberal license. That said if you like this software and would like to support its development donations are welcome.
* PayPal: https://paypal.me/trapexit
* GitHub Sponsors: https://github.com/sponsors/trapexit
* Patreon: https://www.patreon.com/trapexit
* SubscribeStar: https://www.subscribestar.com/trapexit
* Ko-Fi: https://ko-fi.com/trapexit
* Open Collective: https://opencollective.com/trapexit
* Bitcoin (BTC): bc1qjwlywkqxgrxql3m7a7fvcsf3z3t98jvtekqp2j
* Bitcoin Cash (BCH): qrvymmkvuk7703m7cx0pqxc3mz4mmsn6ngn9xw52kc
* Bitcoin SV (BSV): 1FkFuxRtt3f8LbkpeUKRZq7gKJFzGSGgZV
* Bitcoin Gold (BTG): Gfk8QbMJFgpMTcY7uB63axy6HU7uTPPWNj
* Basic Attention Token (BAT): 0x6241857fa5fb7667FB7a792b13E83fDEabe96f7F
* Chainlink (LINK): 0x6241857fa5fb7667FB7a792b13E83fDEabe96f7F
* Dash (DASH): Xu2U3Nd3G4hM5TRQUBcP4DHJFzXH93xB84
* Dogecoin (DOGE): DGFBPsRBYL8wHbgnvKbYkVn5FvAe854p1c
* Ethereum (ETH): 0x6241857fa5fb7667FB7a792b13E83fDEabe96f7F
* Filecoin (FIL): f1wpypkjcluufzo74yha7p67nbxepzizlroockgcy
* LBRY Credits (LBC): bFusyoZPkSuzM2Pr8mcthgvkymaosJZt5r
* Litecoin (LTC): LfL7jLNYuVpy7v5TyRyc3yRZ2uhqc4UoR3
* Monero (XMR): 45BBZMrJwPSaFwSoqLVNEggWR2BJJsXxz7bNz8FXnnFo3GyhVJFSCrCFSS7zYwDa9r1TmFmGMxQ2HTntuc11yZ9q1LeCE8f
* Tezos (XTZ): tz1ZxerkbbALsuU9XGV9K9fFpuLWnKAGfc1C
* Zcash (ZEC): t1bjbVBK7tx9EGBrnD2wDfjGV9yZrcyfMmr
* Other crypto currencies: contact me for address
## LINKS
* https://spawn.link
* https://github.com/trapexit/mergerfs
* https://github.com/trapexit/mergerfs/wiki
* https://github.com/trapexit/mergerfs-tools
* https://github.com/trapexit/scorch
* https://github.com/trapexit/bbf
* https://github.com/trapexit/backup-and-recovery-howtos
07070100000004000041ED00000000000000000000000264FFAFB700000000000000000000000000000000000000000000003300000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/src07070100000005000081ED00000000000000000000000164FFAFB7000025D4000000000000000000000000000000000000004400000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/src/mergerfs.balance#!/usr/bin/env python3
# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
import argparse
import ctypes
import errno
import fnmatch
import io
import os
import shlex
import subprocess
import sys
_libc = ctypes.CDLL("libc.so.6",use_errno=True)
_lgetxattr = _libc.lgetxattr
_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
def lgetxattr(path,name):
if type(path) == str:
path = path.encode(errors='backslashreplace')
if type(name) == str:
name = name.encode(errors='backslashreplace')
length = 64
while True:
buf = ctypes.create_string_buffer(length)
res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
if res >= 0:
return buf.raw[0:res].decode(errors='backslashreplace')
else:
err = ctypes.get_errno()
if err == errno.ERANGE:
length *= 2
elif err == errno.ENODATA:
return None
else:
raise IOError(err,os.strerror(err),path)
def ismergerfs(path):
try:
lgetxattr(path,'user.mergerfs.version')
return True
except IOError as e:
return False
def mergerfs_control_file(basedir):
if basedir == '/':
return None
ctrlfile = os.path.join(basedir,'.mergerfs')
if os.path.exists(ctrlfile):
return ctrlfile
else:
dirname = os.path.dirname(basedir)
return mergerfs_control_file(dirname)
def mergerfs_srcmounts(ctrlfile):
srcmounts = lgetxattr(ctrlfile,'user.mergerfs.srcmounts')
srcmounts = srcmounts.split(':')
return srcmounts
def match(filename,matches):
for match in matches:
if fnmatch.fnmatch(filename,match):
return True
return False
def exclude_by_size(filepath,exclude_lt,exclude_gt):
try:
st = os.lstat(filepath)
if exclude_lt and st.st_size < exclude_lt:
return True
if exclude_gt and st.st_size > exclude_gt:
return True
return False
except:
return False
def find_a_file(src,
relpath,
file_includes,file_excludes,
path_includes,path_excludes,
exclude_lt,exclude_gt):
basepath = os.path.join(src,relpath)
for (dirpath,dirnames,filenames) in os.walk(basepath):
for filename in filenames:
filepath = os.path.join(dirpath,filename)
if match(filename,file_excludes):
continue
if match(filepath,path_excludes):
continue
if not match(filename,file_includes):
continue
if not match(filepath,path_includes):
continue
if exclude_by_size(filepath,exclude_lt,exclude_gt):
continue
return os.path.relpath(filepath,src)
return None
def execute(args):
return subprocess.call(args)
def print_args(args):
quoted = [shlex.quote(arg) for arg in args]
print(' '.join(quoted))
def build_move_file(src,dst,relfile):
frompath = os.path.join(src,'./',relfile)
topath = dst+'/'
args = ['rsync',
'-avlHAXWE',
'--relative',
'--progress',
'--remove-source-files',
frompath,
topath]
return args
def freespace_percentage(srcmounts):
lfsp = []
for srcmount in srcmounts:
vfs = os.statvfs(srcmount)
avail = vfs.f_bavail * vfs.f_frsize
total = vfs.f_blocks * vfs.f_frsize
per = avail / total
lfsp.append((srcmount,per))
return sorted(lfsp, key=lambda x: x[1])
def all_within_range(l,n):
if len(l) == 0 or len(l) == 1:
return True
return (abs(l[0][1] - l[-1][1]) <= n)
def human_to_bytes(s):
m = s[-1]
if m == 'K':
i = int(s[0:-1]) * 1024
elif m == 'M':
i = int(s[0:-1]) * 1024 * 1024
elif m == 'G':
i = int(s[0:-1]) * 1024 * 1024 * 1024
elif m == 'T':
i = int(s[0:-1]) * 1024 * 1024 * 1024 * 1024
else:
i = int(s)
return i
def buildargparser():
parser = argparse.ArgumentParser(description='balance files on a mergerfs mount based on percentage drive filled')
parser.add_argument('dir',
type=str,
help='starting directory')
parser.add_argument('-p',
dest='percentage',
type=float,
default=2.0,
help='percentage range of freespace (default 2.0)')
parser.add_argument('-i','--include',
dest='include',
type=str,
action='append',
default=[],
help='fnmatch compatible file filter (can use multiple times)')
parser.add_argument('-e','--exclude',
dest='exclude',
type=str,
action='append',
default=[],
help='fnmatch compatible file filter (can use multiple times)')
parser.add_argument('-I','--include-path',
dest='includepath',
type=str,
action='append',
default=[],
help='fnmatch compatible path filter (can use multiple times)')
parser.add_argument('-E','--exclude-path',
dest='excludepath',
type=str,
action='append',
default=[],
help='fnmatch compatible path filter (can use multiple times)')
parser.add_argument('-s',
dest='excludelt',
type=str,
default='0',
help='exclude files smaller than <int>[KMGT] bytes')
parser.add_argument('-S',
dest='excludegt',
type=str,
default='0',
help='exclude files larger than <int>[KMGT] bytes')
return parser
def main():
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
encoding='utf8',
errors="backslashreplace",
line_buffering=True)
sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
encoding='utf8',
errors="backslashreplace",
line_buffering=True)
parser = buildargparser()
args = parser.parse_args()
args.dir = os.path.realpath(args.dir)
ctrlfile = mergerfs_control_file(args.dir)
if not ismergerfs(ctrlfile):
print("%s is not a mergerfs mount" % args.dir)
sys.exit(1)
relpath = ''
mntpoint = os.path.dirname(ctrlfile)
if args.dir != mntpoint:
relpath = os.path.relpath(args.dir,mntpoint)
file_includes = ['*'] if not args.include else args.include
file_excludes = args.exclude
path_includes = ['*'] if not args.includepath else args.includepath
path_excludes = args.excludepath
exclude_lt = human_to_bytes(args.excludelt)
exclude_gt = human_to_bytes(args.excludegt)
srcmounts = mergerfs_srcmounts(ctrlfile)
percentage = args.percentage / 100
try:
l = freespace_percentage(srcmounts)
while not all_within_range(l,percentage):
todrive = l[-1][0]
relfilepath = None
while not relfilepath and len(l):
fromdrive = l[0][0]
del l[0]
relfilepath = find_a_file(fromdrive,
relpath,
file_includes,file_excludes,
path_includes,path_excludes,
exclude_lt,exclude_gt)
if len(l) == 0:
print('Could not find file to transfer: exiting...')
break
if fromdrive == todrive:
print('Source drive == target drive: exiting...')
break
args = build_move_file(fromdrive,todrive,relfilepath)
print('file: {}\nfrom: {}\nto: {}'.format(relfilepath,fromdrive,todrive))
print_args(args)
rv = execute(args)
if rv:
print('ERROR - exited with exit code: {}'.format(rv))
break
l = freespace_percentage(srcmounts)
print('Branches within {:.1%} range: '.format(percentage))
for (branch,percentage) in l:
print(' * {}: {:.2%} free'.format(branch,percentage))
except KeyboardInterrupt:
print("exiting: CTRL-C pressed")
sys.exit(0)
if __name__ == "__main__":
main()
07070100000006000081ED00000000000000000000000164FFAFB700002081000000000000000000000000000000000000004800000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/src/mergerfs.consolidate#!/usr/bin/env python3
# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
import argparse
import ctypes
import errno
import fnmatch
import io
import os
import shlex
import stat
import subprocess
import sys
_libc = ctypes.CDLL("libc.so.6",use_errno=True)
_lgetxattr = _libc.lgetxattr
_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
def lgetxattr(path,name):
if type(path) == str:
path = path.encode(errors='backslashreplace')
if type(name) == str:
name = name.encode(errors='backslashreplace')
length = 64
while True:
buf = ctypes.create_string_buffer(length)
res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
if res >= 0:
return buf.raw[0:res]
else:
err = ctypes.get_errno()
if err == errno.ERANGE:
length *= 2
elif err == errno.ENODATA:
return None
else:
raise IOError(err,os.strerror(err),path)
def xattr_relpath(fullpath):
return lgetxattr(fullpath,'user.mergerfs.relpath').decode(errors='backslashreplace')
def xattr_basepath(fullpath):
return lgetxattr(fullpath,'user.mergerfs.basepath').decode(errors='backslashreplace')
def ismergerfs(path):
try:
lgetxattr(path,'user.mergerfs.version')
return True
except IOError as e:
return False
def mergerfs_control_file(basedir):
if basedir == '/':
return None
ctrlfile = os.path.join(basedir,'.mergerfs')
if os.path.exists(ctrlfile):
return ctrlfile
else:
dirname = os.path.dirname(basedir)
return mergerfs_control_file(dirname)
def mergerfs_srcmounts(ctrlfile):
srcmounts = lgetxattr(ctrlfile,'user.mergerfs.srcmounts')
srcmounts = srcmounts.decode(errors='backslashreplace').split(':')
return srcmounts
def match(filename,matches):
for match in matches:
if fnmatch.fnmatch(filename,match):
return True
return False
def execute_cmd(args):
return subprocess.call(args)
def print_args(args):
quoted = [shlex.quote(arg) for arg in args]
print(' '.join(quoted))
def human_to_bytes(s):
m = s[-1]
if m == 'K':
i = int(s[0:-1]) * 1024
elif m == 'M':
i = int(s[0:-1]) * 1024 * 1024
elif m == 'G':
i = int(s[0:-1]) * 1024 * 1024 * 1024
elif m == 'T':
i = int(s[0:-1]) * 1024 * 1024 * 1024 * 1024
else:
i = int(s)
return i
def get_stats(branches):
sizes = {}
for branch in branches:
vfs = os.statvfs(branch)
sizes[branch] = vfs.f_bavail * vfs.f_frsize
return sizes
def build_move_file(src,tgt,rel):
rel = rel.strip('/')
srcpath = os.path.join(src,'./',rel)
tgtpath = tgt.rstrip('/') + '/'
return ['rsync',
'-avHAXWE',
'--numeric-ids',
'--progress',
'--relative',
'--remove-source-files',
srcpath,
tgtpath]
def print_help():
help = \
'''
usage: mergerfs.consolidate [<options>] <dir>
Consolidate files in a single mergerfs directory onto a single drive.
positional arguments:
dir starting directory
optional arguments:
-m, --max-files= Skip directories with more than N files.
(default: 256)
-M, --max-size= Skip directories with files adding up to more
than N. (default: 16G)
-I, --include-path= fnmatch compatible path include filter.
Can be used multiple times.
-E, --exclude-path= fnmatch compatible path exclude filter.
Can be used multiple times.
-e, --execute Execute `rsync` commands as well as print them.
-h, --help Print this help.
'''
print(help)
def buildargparser():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('dir',
type=str,
nargs='?',
default=None)
parser.add_argument('-m','--max-files',
dest='max_files',
type=int,
default=256)
parser.add_argument('-M','--max-size',
dest='max_size',
type=human_to_bytes,
default='16G')
parser.add_argument('-I','--include-path',
dest='includepath',
type=str,
action='append',
default=[])
parser.add_argument('-E','--exclude-path',
dest='excludepath',
type=str,
action='append',
default=[])
parser.add_argument('-e','--execute',
dest='execute',
action='store_true')
parser.add_argument('-h','--help',
action='store_true')
return parser
def main():
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
encoding='utf8',
errors='backslashreplace',
line_buffering=True)
sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
encoding='utf8',
errors='backslashreplace',
line_buffering=True)
parser = buildargparser()
args = parser.parse_args()
if args.help or not args.dir:
print_help()
sys.exit(0)
args.dir = os.path.realpath(args.dir)
ctrlfile = mergerfs_control_file(args.dir)
if not ismergerfs(ctrlfile):
print("%s is not a mergerfs mount" % args.dir)
sys.exit(1)
basedir = args.dir
execute = args.execute
max_files = args.max_files
max_size = args.max_size
path_includes = ['*'] if not args.includepath else args.includepath
path_excludes = args.excludepath
srcmounts = mergerfs_srcmounts(ctrlfile)
mount_stats = get_stats(srcmounts)
try:
for (root,dirs,files) in os.walk(basedir):
if len(files) <= 1:
continue
if len(files) > max_files:
continue
if match(root,path_excludes):
continue
if not match(root,path_includes):
continue
total_size = 0
file_stats = {}
for file in files:
fullpath = os.path.join(root,file)
st = os.lstat(fullpath)
if not stat.S_ISREG(st.st_mode):
continue
total_size += st.st_size
file_stats[fullpath] = st
if total_size >= max_size:
continue
tgtpath = sorted(mount_stats.items(),key=lambda x: x[1],reverse=True)[0][0]
for (fullpath,st) in sorted(file_stats.items()):
srcpath = xattr_basepath(fullpath)
if srcpath == tgtpath:
continue
relpath = xattr_relpath(fullpath)
mount_stats[srcpath] += st.st_size
mount_stats[tgtpath] -= st.st_size
args = build_move_file(srcpath,tgtpath,relpath)
print_args(args)
if execute:
execute_cmd(args)
except (KeyboardInterrupt,BrokenPipeError):
pass
sys.exit(0)
if __name__ == "__main__":
main()
07070100000007000081ED00000000000000000000000164FFAFB70000202D000000000000000000000000000000000000004000000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/src/mergerfs.ctl#!/usr/bin/env python3
# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
import argparse
import os
import sys
def find_mergerfs():
rv = []
with open('/proc/self/mountinfo','r') as f:
for line in f:
values = line.split()
mountroot, mountpoint = values[3:5]
separator = values.index('-', 6)
fstype = values[separator + 1]
if fstype == 'fuse.mergerfs' and mountroot == '/':
rv.append(mountpoint.encode().decode('unicode_escape'))
return rv
def ask_about_path(paths):
prompt = 'Available mergerfs mounts:\n'
for i in range(0,len(paths)):
prompt += ' {0}: {1}\n'.format(i,paths[i])
prompt += 'Choose which mount to act on: '
path = input(prompt)
return paths[int(path)]
def device2mount(device):
with open('/proc/mounts','r') as f:
for line in f:
columns = line.split()
if columns[0] == device:
return columns[1]
with open('/etc/fstab','r') as f:
for line in f:
columns = line.split()
try:
if columns[0] == device:
return columns[1]
realpath = os.path.realpath(columns[0])
if realpath == device:
return columns[1]
except:
pass
return None
def control_file(path):
return os.path.join(path,'.mergerfs')
def add_srcmount(ctrlfile,srcmount):
key = b'user.mergerfs.branches'
value = b'+' + srcmount.encode()
try:
os.setxattr(ctrlfile,key,value)
except Exception as e:
print(e)
def remove_srcmount(ctrlfile,srcmount):
key = b'user.mergerfs.branches'
value = b'-' + srcmount.encode()
try:
os.setxattr(ctrlfile,key,value)
except Exception as e:
print(e)
def normalize_key(key):
if type(key) == bytes:
if key.startswith(b'user.mergerfs.'):
return key
return b'user.mergerfs.' + key
elif type(key) == str:
if key.startswith('user.mergerfs.'):
return key
return 'user.mergerfs.' + key
def print_mergerfs_info(fspaths):
for fspath in fspaths:
ctrlfile = control_file(fspath)
version = os.getxattr(ctrlfile,'user.mergerfs.version')
pid = os.getxattr(ctrlfile,'user.mergerfs.pid')
srcmounts = os.getxattr(ctrlfile,'user.mergerfs.srcmounts')
output = ('- mount: {0}\n'
' version: {1}\n'
' pid: {2}\n'
' srcmounts:\n'
' - ').format(fspath,
version.decode(),
pid.decode())
srcmounts = srcmounts.decode().split(':')
output += '\n - '.join(srcmounts)
print(output)
def build_arg_parser():
desc = 'a tool for runtime manipulation of mergerfs'
parser = argparse.ArgumentParser(description=desc)
subparsers = parser.add_subparsers(dest='command')
parser.add_argument('-m','--mount',
type=str,
help='mergerfs mount to act on')
addopt = subparsers.add_parser('add')
addopt.add_argument('type',choices=['path','device'])
addopt.add_argument('path',type=str)
addopt.set_defaults(func=cmd_add)
removeopt = subparsers.add_parser('remove')
removeopt.add_argument('type',choices=['path','device'])
removeopt.add_argument('path',type=str)
removeopt.set_defaults(func=cmd_remove)
listopt = subparsers.add_parser('list')
listopt.add_argument('type',choices=['options','values'])
listopt.set_defaults(func=cmd_list)
getopt = subparsers.add_parser('get')
getopt.add_argument('option',type=str,nargs='+')
getopt.set_defaults(func=cmd_get)
setopt = subparsers.add_parser('set')
setopt.add_argument('option',type=str)
setopt.add_argument('value',type=str)
setopt.set_defaults(func=cmd_set)
infoopt = subparsers.add_parser('info')
infoopt.set_defaults(func=cmd_info)
return parser
def cmd_add(fspaths,args):
if args.type == 'device':
return cmd_add_device(fspaths,args)
elif args.type == 'path':
return cmd_add_path(fspaths,args)
def cmd_add_device(fspaths,args):
for fspath in fspaths:
ctrlfile = control_file(fspath)
mount = device2mount(args.path)
if mount:
add_srcmount(ctrlfile,mount)
else:
print('{0} not found'.format(args.path))
def cmd_add_path(fspaths,args):
for fspath in fspaths:
ctrlfile = control_file(fspath)
add_srcmount(ctrlfile,args.path)
def cmd_remove(fspaths,args):
if args.type == 'device':
return cmd_remove_device(fspaths,args)
elif args.type == 'path':
return cmd_remove_path(fspaths,args)
def cmd_remove_device(fspaths,args):
for fspath in fspaths:
ctrlfile = control_file(fspath)
mount = device2mount(args.path)
if mount:
remove_srcmount(ctrlfile,mount)
else:
print('{0} not found'.format(args.path.decode()))
def cmd_remove_path(fspaths,args):
for fspath in fspaths:
ctrlfile = control_file(fspath)
remove_srcmount(ctrlfile,args.path)
def cmd_list(fspaths,args):
if args.type == 'values':
return cmd_list_values(fspaths,args)
if args.type == 'options':
return cmd_list_options(fspaths,args)
def cmd_list_options(fspaths,args):
for fspath in fspaths:
ctrlfile = control_file(fspath)
keys = os.listxattr(ctrlfile)
output = ('- mount: {0}\n'
' options:\n').format(fspath)
for key in keys:
output += ' - {0}\n'.format(key)
print(output,end='')
def cmd_list_values(fspaths,args):
for fspath in fspaths:
ctrlfile = control_file(fspath)
keys = os.listxattr(ctrlfile)
output = ('- mount: {0}\n'
' options:\n').format(fspath)
for key in keys:
value = os.getxattr(ctrlfile,key)
output += ' {0}: {1}\n'.format(key,value.decode())
print(output,end='')
def cmd_get(fspaths,args):
for fspath in fspaths:
ctrlfile = control_file(fspath)
print('- mount: {0}'.format(fspath))
for key in args.option:
key = normalize_key(key)
value = os.getxattr(ctrlfile,key).decode()
print(' {0}: {1}'.format(key,value))
def cmd_set(fspaths,args):
for fspath in fspaths:
ctrlfile = control_file(fspath)
key = normalize_key(args.option)
value = args.value.encode()
try:
os.setxattr(ctrlfile,key,value)
except Exception as e:
print(e)
def cmd_info(fspaths,args):
print_mergerfs_info(fspaths)
def print_and_exit(string,rv):
print(string)
sys.exit(rv)
def main():
parser = build_arg_parser()
args = parser.parse_args()
fspaths = find_mergerfs()
if args.mount and args.mount in fspaths:
fspaths = [args.mount]
elif not args.mount and not fspaths:
print_and_exit('no mergerfs mounts found',1)
elif args.mount and args.mount not in fspaths:
print_and_exit('{0} is not a mergerfs mount'.format(args.mount),1)
if hasattr(args, 'func'):
args.func(fspaths,args)
else:
parser.print_help()
sys.exit(0)
if __name__ == "__main__":
main()
07070100000008000081ED00000000000000000000000164FFAFB7000040E3000000000000000000000000000000000000004200000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/src/mergerfs.dedup#!/usr/bin/env python3
# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
import argparse
import ctypes
import errno
import fnmatch
import hashlib
import io
import os
import random
import shlex
import sys
_libc = ctypes.CDLL("libc.so.6",use_errno=True)
_lgetxattr = _libc.lgetxattr
_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
def lgetxattr(path,name):
if type(path) == str:
path = path.encode(errors='backslashreplace')
if type(name) == str:
name = name.encode(errors='backslashreplace')
length = 64
while True:
buf = ctypes.create_string_buffer(length)
res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
if res >= 0:
return buf.raw[0:res]
else:
err = ctypes.get_errno()
if err == errno.ERANGE:
length *= 2
elif err == errno.ENODATA:
return None
else:
raise IOError(err,os.strerror(err),path)
def ismergerfs(path):
try:
lgetxattr(path,b'user.mergerfs.fullpath')
return True
except IOError as e:
return False
def hash_file(filepath, hasher=None, blocksize=65536):
if not hasher:
hasher = hashlib.md5()
with open(filepath,'rb') as afile:
buf = afile.read(blocksize)
while buf:
hasher.update(buf)
buf = afile.read(blocksize)
return hasher.hexdigest()
def short_hash_file(filepath, hasher=None, blocksize=65536, blocks=16):
if not hasher:
hasher = hashlib.md5()
with open(filepath,'rb') as f:
size = os.fstat(f.fileno()).st_size
if size <= blocksize:
size = 1
blocks = 1
random.seed(size,version=2)
for _ in range(blocks):
offset = random.randrange(size)
f.seek(offset)
buf = f.read(blocksize)
if buf:
hasher.update(buf)
else:
break
return hasher.hexdigest()
def sizeof_fmt(num):
for unit in ['','K','M','G','T','P','E','Z']:
if abs(num) < 1024.0:
return "%3.1f%sB" % (num,unit)
num /= 1024.0
return "%.1f%sB" % (num,'Y')
def stat_files(paths):
rv = []
for path in paths:
try:
st = os.stat(path)
rv.append((path,st))
except:
pass
return rv
def remove(files,execute,verbose):
for (path,stat) in files:
try:
print('rm -vf',shlex.quote(path))
if execute:
os.remove(path)
except Exception as e:
print("%s" % e)
def print_stats(stats):
for i in range(0,len(stats)):
print("# %i: %s" % (i+1,stats[i][0]))
data = ("# - uid: {0:5}; gid: {1:5}; mode: {2:6o}; "
"size: {3}; mtime: {4}").format(
stats[i][1].st_uid,
stats[i][1].st_gid,
stats[i][1].st_mode,
sizeof_fmt(stats[i][1].st_size),
stats[i][1].st_mtime)
print(data)
def total_size(stats):
total = 0
for (name,stat) in stats:
total = total + stat.st_size
return total
def manual_dedup(fullpath,stats):
done = False
while not done:
value = input("# Which to keep? ('s' to skip):")
if value.lower() == 's':
stats.clear()
done = True
continue
try:
value = int(value) - 1
if value < 0 or value >= len(stats):
raise ValueError
stats.remove(stats[value])
done = True
except NameError:
print("Input error: enter a value [1-{0}] or skip by entering 's'".format(len(stats)))
except ValueError:
print("Input error: enter a value [1-{0}] or skip by entering 's'".format(len(stats)))
def mtime_all(stats):
mtime = stats[0][1].st_mtime
return all(x[1].st_mtime == mtime for x in stats)
def mtime_any(mtime,stats):
return any([st.st_mtime == mtime for (path,st) in stats])
def size_all(stats):
size = stats[0][1].st_size
return all(x[1].st_size == size for x in stats)
def size_any(size,stats):
return any([st.st_size == size for (path,st) in stats])
def md5sums_all(stats):
if size_all(stats):
hashval = hash_file(stats[0][0])
return all(hash_file(path) == hashval for (path,st) in stats[1:])
return False
def short_md5sums_all(stats):
if size_all(stats):
hashval = short_hash_file(stats[0][0])
return all(short_hash_file(path) == hashval for (path,st) in stats[1:])
return False
def oldest_dedup(fullpath,stats):
if size_all(stats) and mtime_all(stats):
drive_with_most_space_dedup(fullpath,stats)
return
stats.sort(key=lambda st: st[1].st_mtime)
oldest = stats[0]
stats.remove(oldest)
def strict_oldest_dedup(fullpath,stats):
stats.sort(key=lambda st: st[1].st_mtime,reverse=False)
oldest = stats[0]
stats.remove(oldest)
if mtime_any(oldest[1].st_mtime,stats):
stats.clear()
def newest_dedup(fullpath,stats):
if size_all(stats) and mtime_all(stats):
drive_with_most_space_dedup(fullpath,stats)
return
stats.sort(key=lambda st: st[1].st_mtime,reverse=True)
newest = stats[0]
stats.remove(newest)
def strict_newest_dedup(fullpath,stats):
stats.sort(key=lambda st: st[1].st_mtime,reverse=True)
newest = stats[0]
stats.remove(newest)
if mtime_any(newest[1].st_mtime,stats):
stats.clear()
def largest_dedup(fullpath,stats):
if size_all(stats) and mtime_all(stats):
drive_with_most_space_dedup(fullpath,stats)
return
stats.sort(key=lambda st: st[1].st_size,reverse=True)
largest = stats[0]
stats.remove(largest)
def strict_largest_dedup(fullpath,stats):
stats.sort(key=lambda st: st[1].st_size,reverse=True)
largest = stats[0]
stats.remove(largest)
if size_any(largest[1].st_size,stats):
stats.clear()
def smallest_dedup(fullpath,stats):
if size_all(stats) and mtime_all(stats):
drive_with_most_space_dedup(fullpath,stats)
return
stats.sort(key=lambda st: st[1].st_size)
smallest = stats[0]
stats.remove(smallest)
def strict_smallest_dedup(fullpath,stats):
stats.sort(key=lambda st: st[1].st_size,reverse=False)
smallest = stats[0]
stats.remove(smallest)
if size_any(smallest[1].st_size,stats):
stats.clear()
def calc_space_free(stat):
st = os.statvfs(stat[0])
return st.f_frsize * st.f_bfree
def drive_with_most_space_dedup(fullpath,stats):
stats.sort(key=calc_space_free,reverse=True)
largest = stats[0]
stats.remove(largest)
def mergerfs_getattr_dedup(origpath,stats):
fullpath = getxattr(origpath,b'user.mergerfs.fullpath')
for (path,stat) in stats:
if path != fullpath:
continue
stats.remove((path,stat))
break
def get_dedupfun(name,strict):
if strict:
name = 'strict-' + name
funs = {
'manual': manual_dedup,
'strict-manual': manual_dedup,
'mostfreespace': drive_with_most_space_dedup,
'strict-mostfreespace': drive_with_most_space_dedup,
'newest': newest_dedup,
'strict-newest': strict_newest_dedup,
'oldest': oldest_dedup,
'strict-oldest': strict_oldest_dedup,
'largest': largest_dedup,
'strict-largest': strict_largest_dedup,
'smallest': smallest_dedup,
'strict-smallest': strict_smallest_dedup,
'mergerfs': mergerfs_getattr_dedup,
'strict-mergerfs': mergerfs_getattr_dedup
}
return funs[name]
def get_ignorefun(name):
funs = {
None: lambda x: None,
'same-time': mtime_all,
'diff-time': lambda x: not mtime_all(x),
'same-size': size_all,
'diff-size': lambda x: not size_all(x),
'same-hash': md5sums_all,
'diff-hash': lambda x: not md5sums_all(x),
'same-short-hash': short_md5sums_all,
'diff-short-hash': lambda x: not short_md5sums_all(x)
}
return funs[name]
def getxattr(path,key):
try:
attr = lgetxattr(path,key)
if attr:
return attr.decode('utf-8')
return ''
except IOError as e:
if e.errno == errno.ENODATA:
return ''
raise
except UnicodeDecodeError as e:
print(e)
print(attr)
return ''
def match(filename,matches):
for match in matches:
if fnmatch.fnmatch(filename,match):
return True
return False
def dedup(fullpath,verbose,ignorefun,execute,dedupfun):
paths = getxattr(fullpath,b'user.mergerfs.allpaths').split('\0')
if len(paths) <= 1:
return 0
stats = stat_files(paths)
if ignorefun(stats):
if verbose >= 2:
print('# ignored:',fullpath)
return 0
if (dedupfun == manual_dedup):
print('#',fullpath)
print_stats(stats)
try:
dedupfun(fullpath,stats)
if not stats:
if verbose >= 2:
print('# skipped:',fullpath)
return 0
if (dedupfun != manual_dedup):
if verbose >= 2:
print('#',fullpath)
if verbose >= 3:
print_stats(stats)
for (path,stat) in stats:
try:
if verbose:
print('rm -vf',shlex.quote(path))
if execute:
os.remove(path)
except Exception as e:
print('#',e)
return total_size(stats)
except Exception as e:
print(e)
return 0
def print_help():
help = \
'''
usage: mergerfs.dedup [<options>] <dir>
Remove duplicate files across branches of a mergerfs pool. Provides
multiple algos for determining which file to keep and what to skip.
positional arguments:
dir Starting directory
optional arguments:
-v, --verbose Once to print `rm` commands
Twice for status info
Three for file info
-i, --ignore= Ignore files if... (default: none)
* same-size : have the same size
* diff-size : have different sizes
* same-time : have the same mtime
* diff-time : have different mtimes
* same-hash : have the same md5sum
* diff-hash : have different md5sums
* same-short-hash : have the same short md5sums
* diff-short-hash : have different short md5sums
'hash' is expensive. 'short-hash' far less
expensive, not as safe, but pretty good.
-d, --dedup= What file to *keep* (default: mergerfs)
* manual : ask user
* oldest : file with smallest mtime
* newest : file with largest mtime
* largest : file with largest size
* smallest : file with smallest size
* mostfreespace : file on drive with most free space
* mergerfs : file selected by the mergerfs
getattr policy
-s, --strict Skip dedup if all files have same (mtime,size) value.
Only applies to oldest, newest, largest, smallest.
-e, --execute Will not perform file removal without this.
-I, --include= fnmatch compatible filter to include files.
Can be used multiple times.
-E, --exclude= fnmatch compatible filter to exclude files.
Can be used multiple times.
-D, --exclude-dir= Directories to exclude from search.
Can be used multiple times.
'''
print(help)
def buildargparser():
desc = 'dedup files across branches in a mergerfs pool'
usage = 'mergerfs.dedup [<options>] <dir>'
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('dir',
type=str,
nargs='?',
default=None,
help='starting directory')
parser.add_argument('-v','--verbose',
action='count',
default=0)
parser.add_argument('-i','--ignore',
choices=['same-size','diff-size',
'same-time','diff-time',
'same-hash','diff-hash',
'same-short-hash',
'diff-short-hash'])
parser.add_argument('-d','--dedup',
choices=['manual',
'oldest','newest',
'smallest','largest',
'mostfreespace',
'mergerfs'],
default='mergerfs')
parser.add_argument('-s','--strict',
action='store_true')
parser.add_argument('-e','--execute',
action='store_true')
parser.add_argument('-I','--include',
type=str,
action='append',
default=[])
parser.add_argument('-E','--exclude',
type=str,
action='append',
default=[])
parser.add_argument('-D','--exclude-dir',
dest='excludedir',
type=str,
action='append',
default=[])
parser.add_argument('-h','--help',
action='store_true')
return parser
def main():
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
encoding='utf8',
errors='backslashreplace',
line_buffering=True)
sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
encoding='utf8',
errors='backslashreplace',
line_buffering=True)
parser = buildargparser()
args = parser.parse_args()
if args.help or not args.dir:
print_help()
sys.exit(0)
args.dir = os.path.realpath(args.dir)
if not ismergerfs(args.dir):
print("%s is not a mergerfs directory" % args.dir)
sys.exit(1)
dedupfun = get_dedupfun(args.dedup,args.strict)
ignorefun = get_ignorefun(args.ignore)
verbose = args.verbose
execute = args.execute
includes = ['*'] if not args.include else args.include
excludes = args.exclude
total_size = 0
try:
for (dirname,dirnames,filenames) in os.walk(args.dir, topdown=True):
dirnames[:] = [dirname for dirname in dirnames if dirname not in args.excludedir]
for filename in filenames:
if match(filename,excludes):
continue
if not match(filename,includes):
continue
fullpath = os.path.join(dirname,filename)
total_size += dedup(fullpath,verbose,ignorefun,execute,dedupfun)
except KeyboardInterrupt:
print("# exiting: CTRL-C pressed")
except IOError as e:
if e.errno == errno.EPIPE:
pass
else:
raise
print('# Total savings:',sizeof_fmt(total_size))
sys.exit(0)
if __name__ == "__main__":
main()
07070100000009000081ED00000000000000000000000164FFAFB70000335B000000000000000000000000000000000000004000000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/src/mergerfs.dup#!/usr/bin/env python3
# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
import argparse
import ctypes
import errno
import fnmatch
import io
import os
import shlex
import subprocess
import sys
_libc = ctypes.CDLL("libc.so.6",use_errno=True)
_lgetxattr = _libc.lgetxattr
_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
def lgetxattr(path,name):
if type(path) == str:
path = path.encode(errors='backslashreplace')
if type(name) == str:
name = name.encode(errors='backslashreplace')
length = 64
while True:
buf = ctypes.create_string_buffer(length)
res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
if res >= 0:
return buf.raw[0:res].decode(errors='backslashreplace')
else:
err = ctypes.get_errno()
if err == errno.ERANGE:
length *= 2
elif err == errno.ENODATA:
return None
else:
raise IOError(err,os.strerror(err),path)
def ismergerfs(path):
try:
lgetxattr(path,'user.mergerfs.basepath')
return True
except IOError as e:
return False
def mergerfs_control_file(basedir):
if basedir == '/':
return None
ctrlfile = os.path.join(basedir,'.mergerfs')
if os.path.exists(ctrlfile):
return ctrlfile
basedir = os.path.dirname(basedir)
return mergerfs_control_file(basedir)
def mergerfs_branches(ctrlfile):
branches = lgetxattr(ctrlfile,'user.mergerfs.srcmounts')
branches = branches.split(':')
return branches
def match(filename,matches):
for match in matches:
if fnmatch.fnmatch(filename,match):
return True
return False
def execute_cmd(args):
return subprocess.call(args)
def print_args(args):
quoted = [shlex.quote(arg) for arg in args]
print(' '.join(quoted))
def build_copy_file(src,tgt,rel):
srcpath = os.path.join(src,'./',rel)
tgtpath = tgt + '/'
return ['rsync',
'-avHAXWE',
'--numeric-ids',
'--progress',
'--relative',
srcpath,
tgtpath]
def build_branches_freespace(branches):
rv = dict()
for branch in branches:
st = os.statvfs(branch)
rv[branch] = st.f_bavail * st.f_frsize
return rv
def print_help():
help = \
'''
usage: mergerfs.dup [<options>] <dir>
Duplicate files & directories across multiple drives in a pool.
Will print out commands for inspection and out of band use.
positional arguments:
dir starting directory
optional arguments:
-c, --count= Number of copies to create. (default: 2)
-d, --dup= Which file (if more than one exists) to choose to
duplicate. Each one falls back to `mergerfs` if
all files have the same value. (default: newest)
* newest : file with largest mtime
* oldest : file with smallest mtime
* smallest : file with smallest size
* largest : file with largest size
* mergerfs : file chosen by mergerfs' getattr
-p, --prune Remove files above `count`. Without this enabled
it will update all existing files.
-e, --execute Execute `rsync` and `rm` commands. Not just
print them.
-I, --include= fnmatch compatible filter to include files.
Can be used multiple times.
-E, --exclude= fnmatch compatible filter to exclude files.
Can be used multiple times.
'''
print(help)
def buildargparser():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('dir',
type=str,
nargs='?',
default=None)
parser.add_argument('-c','--count',
dest='count',
type=int,
default=2)
parser.add_argument('-p','--prune',
dest='prune',
action='store_true')
parser.add_argument('-d','--dup',
choices=['newest','oldest',
'smallest','largest',
'mergerfs'],
default='newest')
parser.add_argument('-e','--execute',
dest='execute',
action='store_true')
parser.add_argument('-I','--include',
dest='include',
type=str,
action='append',
default=[])
parser.add_argument('-E','--exclude',
dest='exclude',
type=str,
action='append',
default=[])
parser.add_argument('-h','--help',
action='store_true')
return parser
def xattr_basepath(fullpath):
return lgetxattr(fullpath,'user.mergerfs.basepath')
def xattr_allpaths(fullpath):
return lgetxattr(fullpath,'user.mergerfs.allpaths')
def xattr_relpath(fullpath):
return lgetxattr(fullpath,'user.mergerfs.relpath')
def exists(base,rel,name):
fullpath = os.path.join(base,rel,name)
return os.path.lexists(fullpath)
def mergerfs_all_basepaths(fullpath,relpath):
attr = xattr_allpaths(fullpath)
if not attr:
dirname = os.path.dirname(fullpath)
basename = os.path.basename(fullpath)
attr = xattr_allpaths(dirname)
attr = attr.split('\0')
attr = [os.path.join(path,basename)
for path in attr
if os.path.lexists(os.path.join(path,basename))]
else:
attr = attr.split('\0')
return [x[:-len(relpath)].rstrip('/') for x in attr]
def mergerfs_basepath(fullpath):
attr = xattr_basepath(fullpath)
if not attr:
dirname = os.path.dirname(fullpath)
basename = os.path.basename(fullpath)
attr = xattr_allpaths(dirname)
attr = attr.split('\0')
for path in attr:
fullpath = os.path.join(path,basename)
if os.path.lexists(fullpath):
relpath = xattr_relpath(dirname)
return path[:-len(relpath)].rstrip('/')
return attr
def mergerfs_relpath(fullpath):
attr = xattr_relpath(fullpath)
if not attr:
dirname = os.path.dirname(fullpath)
basename = os.path.basename(fullpath)
attr = xattr_relpath(dirname)
attr = os.path.join(attr,basename)
return attr.lstrip('/')
def newest_dupfun(default_basepath,relpath,basepaths):
sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
mtime = sts[basepaths[0]].st_mtime
if not all([st.st_mtime == mtime for st in sts.values()]):
return sorted(sts,key=lambda x: sts.get(x).st_mtime,reverse=True)[0]
ctime = sts[basepaths[0]].st_ctime
if not all([st.st_ctime == ctime for st in sts.values()]):
return sorted(sts,key=lambda x: sts.get(x).st_ctime,reverse=True)[0]
return default_basepath
def oldest_dupfun(default_basepath,relpath,basepaths):
sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
mtime = sts[basepaths[0]].st_mtime
if not all([st.st_mtime == mtime for st in sts.values()]):
return sorted(sts,key=lambda x: sts.get(x).st_mtime,reverse=False)[0]
ctime = sts[basepaths[0]].st_ctime
if not all([st.st_ctime == ctime for st in sts.values()]):
return sorted(sts,key=lambda x: sts.get(x).st_ctime,reverse=False)[0]
return default_basepath
def largest_dupfun(default_basepath,relpath,basepaths):
sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
size = sts[basepaths[0]].st_size
if not all([st.st_size == size for st in sts.values()]):
return sorted(sts,key=lambda x: sts.get(x).st_size,reverse=True)[0]
return default_basepath
def smallest_dupfun(default_basepath,relpath,basepaths):
sts = dict([(f,os.lstat(os.path.join(f,relpath))) for f in basepaths])
size = sts[basepaths[0]].st_size
if not all([st.st_size == size for st in sts.values()]):
return sorted(sts,key=lambda x: sts.get(x).st_size,reverse=False)[0]
return default_basepath
def mergerfs_dupfun(default_basepath,relpath,basepaths):
return default_basepath
def getdupfun(name):
funs = {'newest': newest_dupfun,
'oldest': oldest_dupfun,
'smallest': smallest_dupfun,
'largest': largest_dupfun,
'mergerfs': mergerfs_dupfun}
return funs[name]
def main():
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
encoding='utf8',
errors='backslashreplace',
line_buffering=True)
sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
encoding='utf8',
errors='backslashreplace',
line_buffering=True)
parser = buildargparser()
args = parser.parse_args()
if args.help or not args.dir:
print_help()
sys.exit(0)
args.dir = os.path.realpath(args.dir)
if not ismergerfs(args.dir):
print("%s is not a mergerfs mount" % args.dir)
sys.exit(1)
prune = args.prune
execute = args.execute
includes = ['*'] if not args.include else args.include
excludes = args.exclude
dupfun = getdupfun(args.dup)
ctrlfile = mergerfs_control_file(args.dir)
branches = mergerfs_branches(ctrlfile)
branches = build_branches_freespace(branches)
count = min(args.count,len(branches))
try:
for (dirpath,dirnames,filenames) in os.walk(args.dir):
for filename in filenames:
if match(filename,excludes):
continue
if not match(filename,includes):
continue
fullpath = os.path.join(dirpath,filename)
basepath = mergerfs_basepath(fullpath)
relpath = mergerfs_relpath(fullpath)
existing = mergerfs_all_basepaths(fullpath,relpath)
srcpath = dupfun(basepath,relpath,existing)
srcfile = os.path.join(srcpath,relpath)
srcfile_size = os.lstat(srcfile).st_size
existing.remove(srcpath)
i = 1
copies = []
for tgtpath in existing:
if prune and i >= count:
break
copies.append(tgtpath)
args = build_copy_file(srcpath,tgtpath,relpath)
print('# overwrite')
print_args(args)
if execute:
execute_cmd(args)
i += 1
for _ in range(i,count):
for branch in sorted(branches,key=branches.get,reverse=True):
tgtfile = os.path.join(branch,relpath)
if branch in copies or os.path.exists(tgtfile):
continue
copies.append(branch)
branches[branch] -= srcfile_size
args = build_copy_file(srcpath,branch,relpath)
print('# copy')
print_args(args)
if execute:
execute_cmd(args)
break
if prune:
leftovers = set(existing) - set(copies)
for branch in leftovers:
branches[branch] += srcfile_size
tgtfile = os.path.join(branch,relpath)
print('# remove')
args = ['rm','-vf',tgtfile]
print_args(args)
if execute:
execute_cmd(args)
except KeyboardInterrupt:
print("exiting: CTRL-C pressed")
except BrokenPipeError:
pass
sys.exit(0)
if __name__ == "__main__":
main()
0707010000000A000081ED00000000000000000000000164FFAFB7000019BD000000000000000000000000000000000000004100000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/src/mergerfs.fsck#!/usr/bin/env python3
# Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
import argparse
import ctypes
import errno
import io
import os
import sys
_libc = ctypes.CDLL("libc.so.6",use_errno=True)
_lgetxattr = _libc.lgetxattr
_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t]
def lgetxattr(path,name):
if type(path) == str:
path = path.encode(errors='backslashreplace')
if type(name) == str:
name = name.encode(errors='backslashreplace')
length = 64
while True:
buf = ctypes.create_string_buffer(length)
res = _lgetxattr(path,name,buf,ctypes.c_size_t(length))
if res >= 0:
return buf.raw[0:res]
else:
err = ctypes.get_errno()
if err == errno.ERANGE:
length *= 2
elif err == errno.ENODATA:
return None
else:
raise IOError(err,os.strerror(err),path)
def ismergerfs(path):
try:
lgetxattr(path,"user.mergerfs.fullpath")
return True
except IOError as e:
return False
def setstat(stat,paths):
for path in paths:
try:
os.chmod(path,stat.st_mode)
os.chown(path,stat.st_uid,stat.st_gid);
print("set %s > uid: %d gid: %d mode: %o" %
(path,stat.st_uid,stat.st_gid,stat.st_mode))
except Exception as e:
print("%s" % e)
def stats_different(stats):
base = stats[0]
for stat in stats:
if ((stat.st_mode == base.st_mode) and
(stat.st_uid == base.st_uid) and
(stat.st_gid == base.st_gid)):
continue
return True
return False
def size_equal(stats):
base = stats[0]
for stat in stats:
if stat.st_size != base.st_size:
return False
return True
def print_stats(Files,Stats):
for i in range(0,len(Files)):
print(" %i: %s" % (i,Files[i].decode(errors='backslashreplace')))
data = (" - uid: {0:5}; gid: {1:5}; mode: {2:6o}; "
"size: {3:10}; mtime: {4}").format(
Stats[i].st_uid,
Stats[i].st_gid,
Stats[i].st_mode,
Stats[i].st_size,
Stats[i].st_mtime)
print (data)
def noop_fix(paths,stats):
pass
def manual_fix(paths,stats):
done = False
while not done:
try:
value = input('Which is correct?: ')
value = int(value)
if((value >= len(paths)) or (value < 0)):
print("Input error: enter a value [0,%d]" % (len(paths)-1))
continue
setstat(stats[value],paths)
done = True
except Exception as e:
print("%s" % e)
done = True
def newest_fix(paths,stats):
stats.sort(key=lambda stat: stat.st_mtime)
try:
newest = stats[-1]
setstat(newest,paths)
except Exception as e:
print("%s" % e)
def nonroot_fix(paths,stats):
try:
for stat in stats:
if stat.st_uid != 0:
setstat(stat,paths)
return
return newest_fix(paths,stats)
except Exception as e:
print("%s" % e)
def getfixfun(name):
if name == 'manual':
return manual_fix
elif name == 'newest':
return newest_fix
elif name == 'nonroot':
return nonroot_fix
return noop_fix
def check_consistancy(fullpath,verbose,size,fix):
paths = lgetxattr(fullpath,"user.mergerfs.allpaths")
if not paths:
return
paths = paths.split(b'\0')
if len(paths) <= 1:
return
stats = [os.stat(path) for path in paths]
if (size and not size_equal(stats)):
return
if not stats_different(stats):
return
print("%s" % fullpath)
if verbose:
print_stats(paths,stats)
fix(paths,stats)
def buildargparser():
parser = argparse.ArgumentParser(description='audit a mergerfs mount for inconsistencies')
parser.add_argument('dir',type=str,
help='starting directory')
parser.add_argument('-v','--verbose',action='store_true',
help='print details of audit item')
parser.add_argument('-s','--size',action='store_true',
help='only consider if the size is the same')
parser.add_argument('-f','--fix',choices=['manual','newest','nonroot'],
help='fix policy')
return parser
def main():
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
encoding='utf8',
errors='backslashreplace',
line_buffering=True)
sys.stderr = io.TextIOWrapper(sys.stderr.buffer,
encoding='utf8',
errors='backslashreplace',
line_buffering=True)
parser = buildargparser()
args = parser.parse_args()
if args.fix:
args.verbose = True
fix = getfixfun(args.fix)
args.dir = os.path.realpath(args.dir)
if not ismergerfs(args.dir):
print("%s is not a mergerfs directory" % args.dir)
sys.exit(1)
try:
size = args.size
verbose = args.verbose
for (dirname,dirnames,filenames) in os.walk(args.dir):
fulldirpath = os.path.join(args.dir,dirname)
check_consistancy(fulldirpath,verbose,size,fix)
for filename in filenames:
fullpath = os.path.join(fulldirpath,filename)
check_consistancy(fullpath,verbose,size,fix)
except KeyboardInterrupt:
pass
except IOError as e:
if e.errno == errno.EPIPE:
pass
else:
raise
sys.exit(0)
if __name__ == "__main__":
main()
0707010000000B000081ED00000000000000000000000164FFAFB700000219000000000000000000000000000000000000004400000000mergerfs-tools-0.0+git.20230911T192423~80d6c95/src/mergerfs.mktrash#!/bin/bash
TRASHDIR=".Trash"
MOUNTPOINT="${1}"
if [ "${MOUNTPOINT}" = "" ]; then
echo "usage: ${0} <mountpoint>"
exit 1
fi
if [ $EUID -ne 0 ]; then
echo "You must run ${0} as root"
exit 2
fi
if [ ! -e "${MOUNTPOINT}/.mergerfs" ]; then
echo "ERROR: ${MOUNTPOINT} does not appear to be a mergerfs mountpoint"
exit 3
fi
SRCMOUNTS=$(xattr -p user.mergerfs.srcmounts "${MOUNTPOINT}/.mergerfs" | tr : " ")
for mount in ${SRCMOUNTS}
do
DIR="${mount}/${TRASHDIR}"
mkdir -v --mode=1777 "${DIR}"
done
exit 0
07070100000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000B00000000TRAILER!!!156 blocks