LogoopenSUSE Build Service > Projects
Sign Up | Log In

View File torrent-crawler.rb of Package torrent-seeder (Project openSUSE:infrastructure:devel)

#!/usr/bin/ruby
# vim: set sw=4 sts=4 et tw=80 :

require 'rubygems'
require 'bencode'
require 'rubytorrent'
require 'digest/sha1'
require 'digest/md5'
require 'find'
require 'pathname'
require 'pp'
require 'fileutils'
DEBUG=false

SEEDER_DIRECTORY='/srv/torrents'
DISTRO_PREFIX = '/srv'
THIRDPARTY_DIR = '/srv/3rdparty/'
dvd5s_subdir  = File.join(DISTRO_PREFIX,'opensuse-dvd5s','**')

torrent_pattern = []
iso_pattern = []
iso_pattern       << File.join(dvd5s_subdir,'*.iso')
%w{iso delta}.each do |subdir|
  distro_subdir = File.join(DISTRO_PREFIX,'pub','opensuse','distribution','1*',subdir,'**')
  torrent_pattern << File.join(distro_subdir,'*.torrent')
  iso_pattern     << File.join(distro_subdir,'*.iso')
end

iso_pattern << File.join(THIRDPARTY_DIR, '*', '*.iso')
iso_pattern << File.join(THIRDPARTY_DIR, '*', '*.xdelta')
iso_pattern << File.join(THIRDPARTY_DIR, '*', '*.raw.gz')
torrent_pattern << File.join(THIRDPARTY_DIR, '*', '*.torrent')
# %w{iso delta}.each do |subdir|
#   distro_subdir = File.join(DISTRO_PREFIX,'stage','11.1',subdir,'**')
#   torrent_pattern << File.join(distro_subdir,'*.torrent')
#   iso_pattern     << File.join(distro_subdir,'*.iso')
# end
#
class IncompleteISO < Exception
end

class SymlinkCollision < Exception
end

class MissingISO < Exception
end

class Purger
  def initialize(dirname)
    @files = []
    Find.find(dirname) do |path|
      @files << path unless path == dirname
    end
    @files.reverse!
    puts "Purger found #{@files.size} items" if DEBUG
  end

  def keep!(pathname)
    if File.exist? pathname
      #puts "keeping #{pathname}"
      ret = @files.delete(pathname)
    end
  end

  def cleanup!
    @files.each do |fname|
      begin
        puts "unlinking #{fname}"
        Pathname.new(fname).unlink
      rescue Errno::ENOTEMPTY
        # ignore
      end
    end
  end
end

class TorrentLinker
  def initialize(purger, torrents, isos)
     @torrents = torrents
     @isos     = isos
     @purger   = purger
     @handled_torrents = {}
  end

  def run!
    @torrents.each do |fname,fullpath|
       handle_torrent(fullpath)
    end
  end

  private
  def digest(filename)
    $stderr.puts("Hashing '#{filename}'")
    File.open(filename, 'rb') do |fh|
      s = Digest::SHA1.new
      while buf = fh.read(4096)
        s.update(buf)
      end
      s
    end
  end

  def symlinkiso(source,target, size)
    raise MissingISO.new("#{source} is nil")    if source.nil?
    raise MissingISO.new("#{source} doesnt exist")    if !File.exist?(source)
    source_size = File.size(source)
    raise IncompleteISO.new("'#{source}' expected size #{size}. actual size #{source_size}") if (source_size != size)
    @purger.keep! target
    if File.exist?(target) or File.symlink?(target)
      if File.readlink(target) == source.to_s
        # or (File.stat(source).ino == File.stat(target).ino)
        # for some reason we tried to link the same file twice. skipping
        return
      else
        # see if the files differ and jump out if not.
        s1 = digest(source)
        s2 = digest(target)
        if s1 == s2
          return
        else
          raise SymlinkCollision.new("Symlink target '#{target}' already exists:\n   Points to '#{File.readlink(target)}'.\n   The new symlink would point to '#{source}'")
        end
      end
    end
    puts "symlink #{source} => #{target}"
    File.symlink(source,target)
    # File.link(source,target)
  end

  def handle_torrent(fname)
    begin
      tmi = RubyTorrent::MetaInfo.from_location(fname)
      tmii = tmi.info
      tmii.sha1
      if  @handled_torrents[tmii.sha1].nil?
        puts "handling #{tmii.sha1.unpack('H*')}" if DEBUG
        @handled_torrents[tmii.sha1] = fname
      else
        puts "found dupe #{fname.basename} (#{tmii.sha1.unpack('H*')}) (#{fname} <> #{@handled_torrents[tmii.sha1]})" if DEBUG
        return
      end

      files = []
      @purger.keep!(File.join(SEEDER_DIRECTORY, "#{tmii.name}.fastresume"))
      if tmii.single?
        targetname = File.join(SEEDER_DIRECTORY, tmii.name)
        sourcename = @isos[tmii.name]
        symlinkiso sourcename , targetname, tmii.length
      else
        dirname = File.join(SEEDER_DIRECTORY,tmii.name.to_s)
        begin
          @purger.keep!(dirname)
          FileUtils.mkdir(dirname)
        rescue Errno::EEXIST
        end
        tmii.files.each do |iso|
          isofname = iso.path.to_s
          targetname = File.join(dirname, isofname)
          sourcename = @isos[isofname]
          symlinkiso sourcename, targetname, iso.length
        end
      end
      torrent_path = File.join(SEEDER_DIRECTORY, fname.basename.to_s)
      @purger.keep! torrent_path
      File.symlink(fname.to_s,torrent_path) unless File.exist?(torrent_path)
      #File.link(fname.to_s,torrent_path) unless File.exist?(torrent_path)
    rescue RubyTorrent::MetaInfoFormatError, RubyTorrent::BEncodingError => e
      puts "Can't parse #{fname}: maybe not a .torrent file?"
    rescue Errno::EACCES
      puts "Can not read #{fname}"
    rescue MissingISO => e
      puts "Missing iso #{fname}: #{e.message}"
    rescue IncompleteISO => e
      puts "Found incomplete ISO for #{fname}: #{e.message}"
    rescue SymlinkCollision => e
      puts e
      puts "please fix this manually and call the script again!"
    end
  end
end

def getfileshash(pattern)
  globbed = Dir.glob(pattern).map do |f|
    pn = Pathname.new(f)
    [pn.basename.to_s,pn]
  end
  Hash[*globbed.flatten]
end

torrent_files = getfileshash(torrent_pattern)
iso_files     = getfileshash(iso_pattern)
#pp torrent_files
#pp iso_files
purger = Purger.new(SEEDER_DIRECTORY)
torrent_linker = TorrentLinker.new(purger,torrent_files, iso_files)
torrent_linker.run!
purger.cleanup!