File clean-up-sources.sh of Package openclipart

#!/bin/sh

usage()
{
    echo "This script cleans up the png sources."
    echo
    echo "It removes some  broken or unwanted pictures, symlinks duplicates,"
    echo "and moves some pictures to a better location"
    echo
    echo "Usage: ${0##*/}" PNG_HOME
}

if test "$1" = "--help" -o -z "$1" -o ! -d "${DESTDIR}$1" ; then
    usage
    exit 1
fi

PNG_HOME="$1"

move_dir()
{
    target="$1"
    source="$2"

    mv -v "$source" "$target" || exit 1;
}

move_pictures()
{
    target="$1"; shift;

    mv -v "$@" "$target" || exit 1;
    # move also the correspondig svg files
    mv -v `echo "$@" | sed "s|\.png|.svg|g"` "$target" || exit 1;
}

remove_picture()
{
    file="$1"
    rm -v "$file" || exit 1;
    # remove also the corresponding svg file
    rm -v `echo "$file" | sed "s|\.png|.svg|g"` || exit 1;
}

link_picure()
{
    prim_path="$1"
    candidate="$2"
    filename_png="$3"

    filename_svg=`echo "$filename" | sed "s|\.png|.svg|g"`

    for filename in "$filename_png" "$filename_svg" ; do
	rm -v $candidate/$filename || exit 1;
	ln -svf ${prim_path#$DESTDIR}/$filename $candidate/$filename || exit 1;
    done
}

pic_broken_png="
    animals/birds/acquila_architetto_franc_04.png
    animals/birds/penguin/tux_didier_fabert_01.png
    animals/birds/duck_yellow_ii_kurt_cagl_.png
    computer/icons/button2_fudriot_omic.ch_01.png
    computer/icons/gnome-mime-application-x-tar.png
    computer/icons/etiquette-theme/gnome-mime-application-x-tar.png
    computer/icons/flat-theme/action/pen_style_nopen.png
    computer/tux_didier_fabert_01.png
    electronics/filmphoto1_fudriot_omic._01.png
    electronics/filmphoto2_fudriot_omic._01.png
    electronics/bulb/light_bulb_karl_bartel_01.png
    geography/tasmania-*.png
    office/manuscriot_fudriot_omic._01.png
    office/telephone/numero_verde_angelo_gelm_01.png
    people/stylized_yoga_person*.png
    plants/flowers/fiore_01.png
    plants/flowers/fiore_02.png
    recreation/holiday/christmas_light_jasper_v_01.png
    recreation/party/christmas_light_jasper_v_01.png
    recreation/sports/stylized_yoga_person*.png
    recreation/sports/surfboard_jurgen_01.png
    recreation/sports/badminton_racket_brice_b_01.png
    science/astronomy/moon_jasper_van_de_grond_01.png
    shapes/arrows/arrowright_fudriot_omic._01.png
    signs_and_symbols/aids_ribbon_saint_.png
    signs_and_symbols/hosta_0_symbol_1m_bwh.png
    signs_and_symbols/recycle_water_saint_.png
    signs_and_symbols/sakset2.png
    signs_and_symbols/teschio_fabio_olivo_01.png
    signs_and_symbols/teschio_fabio_olivo_02.png
    special/gradients
    transportation/redcar_marcelo_caiafa_.png
    transportation/vehicles/redcar_marcelo_caiafa_.png
    "

pic_less_readable_png="
    buildings/homes/house_fudriot_omic.ch_01.png
    buildings/homes/houses_jon_phillips_01.png
    computer/icons/button1_fudriot_omic*.png
    computer/icons/etiquette-theme/mimetypes/gnome-mime-application-x-tar.png
    shapes/stars/star_*.png
    "

pic_less_usable_png="
    computer/icons/collection_of_passport__01.png
    shapes/callouts/speech_bubbles.png
    shapes/collection_of_passport__01.png
    signs_and_symbols/font_samples_revdoug_01.png
    signs_and_symbols/mpcad_box*.png
    signs_and_symbols/map_symbols/mapa_stawisk_guziec_.png
    special/cd_jacket_template_james_01.png
    special/collection_of_passport__01.png
    special/poster-example_01.png
    special/patterns/pattern-dots-square-grid*.png
    tools/weapons/bullet_hole_linda_kim*.png
    "

pic_controversal="
    geography/world_map_01.png
    geography/world_map_saint_.png
    signs_and_symbols/flags
    "

pic_double="
    animals/bugs/flying_wasp_gerald_g._02.png
    animals/mammals/bears/sleeping_bear_under_sta_03.png
    animals/mammals/dog_01_drawn_with_strai_03.png
    animals/mammals/dog_03_drawn_with_strai_02.png
    animals/mammals/dog_04_drawn_with_strai_02.png
    animals/mammals/dog_05_drawn_with_strai_02.png
    animals/mammals/dog_06_drawn_with_strai_02.png
    animals/mammals/dog_on_leash_gerald_g._02.png
    animals/mammals/horses/horse_1_konstantin_r._01.png
    computer/icons/battery_snuatautisticido_0[2-4].png
    education/scissors_03.png
    electronics/battery/battery_snuatautisticido_02.png
    electronics/battery/battery_snuatautisticido_03.png
    electronics/battery/battery_snuatautisticido_04.png
    food/beverages/flower-pot_02.png
    food/beverages/mug_toh_yen_cheng_02.png
    food/beverages/alcohol/martini_glass_k_yager_02.png
    office/scissors_02.png
    office/scissors_03.png
    people/elfish_girl_mo_.png
    people/jazz_enrique_meza_c_02.png
    people/man_head_mikhail_a.medve_01.png
    people/my_lovely_baby_enrique_m_02.png
    people/bodypart/left_foot_print_benji_pa_02.png
    people/bodypart/right_foot_print_benji_p_02.png
    plants/flowers/red_flower_03.png
    plants/trees/evergreen/christmas_tree_02.png
    recreation/music/fifties_jukebox_gerald_g_02.png
    recreation/music/jazz_enrique_meza_c_02.png
    recreation/music/war_drum_enrique_meza_c_02.png
    recreation/religion/creation_day_?_number_ge_0[2-4].png
    recreation/religion/discordian_sacred_chao_t_01.png
    recreation/sports/cronometro_mauro_olivo_0[2-3].png
    recreation/sports/table_tennis_racquets_o_01r.png
    signs_and_symbols/elder_sign_nubldoff_0[2-3]r.png
    signs_and_symbols/elder_sign_nurbldoff_0[1-6]r.png
    tools/ironing_board_gerald_g._02.png
    tools/scissors_03.png
    tools/vavuum_cleaner_-_uprigh_02.png
    tools/weapons/kallisti-grenade_2_nurbl_01.png
    "

pic_mess="
    buttons
    geography/eiffel_tower_michael_ja_r.png
    geography/shi_ken_01.png
    geography/symbol_for_a_church_on__01.png
    geography/war_drum_enrique_meza_c_0[1-2].png
    people/bodypart/fawn_mo_01.png
    recreation/art/airbrush_alejandro_tejad_.png
    recreation/games/cards/andalusia_01.png
    recreation/games/cards/jacob_leisler_mo_01.png
    signs_and_symbols/map_symbols/map_of_poland_guziec_.png
    shapes/airplane_nicu_buculei_01.png
    shapes/deer_matt_todd_01.png
    shapes/fancy_red_button_matt_to_01.png
    shapes/heart_jon_phillips_01.png
    shapes/left_foot_print_benji_pa_02.png
    shapes/right_foot_print_benji_p_02.png
    shapes/shield_matt_todd*.png
    shapes/wooden_mallot_benji_park_.png
    special/logaritmic_diagram_01.png
    special/examples
    special/patterns/model-view-controller_ia_01.png
    tools/compass_01.png
    unsorted
    "


echo
echo "Removing unwanted png files..."
echo 

for path in $pic_broken_png \
	    $pic_less_readable_png \
	    $pic_less_usable_png \
	    $pic_controversal \
	    $pic_double \
	    $pic_mess ; do
    # xargs is necessary because shapes/stars/star_*.png covers too many files
    echo -n ${DESTDIR}${PNG_HOME}/$path | xargs rm -rv || exit 1;
    # remove also corresponding svg files
    path_svg=`echo $path | sed "s|\.png|.svg|g"`
    echo -n ${DESTDIR}${PNG_HOME}/$path_svg | xargs rm -rfv || exit 1;
done


echo
echo "Moving some wrong sorted png files..."
echo

pushd ${DESTDIR}${PNG_HOME}
move_pictures tools		animals/birds/macchina_fotografica_di_01.png || exit 1;
move_pictures tools		animals/lucchetto_grigio_archite_01.png || exit 1;
move_pictures tools		animals/lucchetto_ottone_archite_01.png || exit 1;
move_pictures tools/weapons	animals/fish/kallisti-grenade_1_nurbl_01.png || exit 1;
move_pictures signs_and_symbols	computer/icons/yang_sergio_luiz_araujo__01.png || exit 1;
move_pictures tools		geography/spyglass_vladimir_hernan_01.png || exit 1;
move_pictures people		plants/human_humain_umano_sed_r.png || exit 1;
move_pictures electronics	recreation/games/cards/audio_cassette_mo_01.png || exit 1;
move_pictures people		recreation/games/cards/brown_hair_boy_face_mike_02.png || exit 1;
move_pictures decorations	recreation/games/cards/celtic-vine-corner_steve_01.png || exit 1;
move_pictures tools		recreation/*scuba_tank*.png || exit 1;
move_pictures electronics/bulb	shapes/cfbulb_*.png || exit 1;
move_pictures signs_and_symbols/awards shapes/clipart_by_nicu_buculei*.png || exit 1;
move_pictures decorations	shapes/coke_*.png || exit 1;
move_pictures signs_and_symbols	shapes/fire-ball_benji_park_01.png || exit 1;
move_pictures tools		shapes/forbici_e_pettine_sci_.png || exit 1;
move_pictures decorations	shapes/golden_hug_joel_montes_d_.png || exit 1;
move_pictures signs_and_symbols	shapes/gpride_jean_victor_balin_.png || exit 1;
move_pictures signs_and_symbols	shapes/h_foot_print_jean_victor_.png || exit 1;
move_pictures decorations	shapes/orleans_express_*.png || exit 1;
move_pictures people		shapes/shadow_babe_robert_seitz_01.png || exit 1;
move_pictures people		shapes/stripper_jarno_vasamaa_01r.png || exit 1;
move_pictures people		shapes/tangram_erwan_01.png || exit 1;
move_pictures people		shapes/user_lambda_jean_victor_.png || exit 1;
move_pictures signs_and_symbols	shapes/arrows/reincarnate_nurbldoff_01r.png || exit 1;
move_pictures electronics	signs_and_symbols/led/led_rainbow_ganson.png || exit 1;
move_pictures electronics	signs_and_symbols/led/leds_and_rainbow.png || exit 1;
move_dir      shapes		signs_and_symbols/led || exit 1;
popd                                                                  

echo
echo "Looking for duplicates..."
echo

TMP_DIR=`mktemp -d /tmp/openclipart-clean-up.XXXXXXXX`

# First, find filenames that are listed more times
find ${DESTDIR}${PNG_HOME} -name "*.png" >$TMP_DIR/png_list_full
sed "s|^.*/\(.*.png\)$|\1|" $TMP_DIR/png_list_full >$TMP_DIR/png_list_filenames
sort $TMP_DIR/png_list_filenames    >$TMP_DIR/png_list_filenames_sorted
sort -u $TMP_DIR/png_list_filenames >$TMP_DIR/png_list_filenames_sorted_uniq
diff -u $TMP_DIR/png_list_filenames_sorted $TMP_DIR/png_list_filenames_sorted_uniq >$TMP_DIR/png_list.diff
grep "^-" $TMP_DIR/png_list.diff | grep -v "^---" | sed "s|^-\(.*\)$|\1|" | sort -u >$TMP_DIR/png_list_double

# Thereafter, clean up duplicates.
#
# Notes:
#    - two files with the same filename could differ in fact
#    - two same pictures would be included in two different subdirectories;
#      we should keep only one and make symlinks from the other locations,
#      for example
#	   symlink: people/a_boy_plays_soccer_01.png -> ../recreation/sports/a_boy_plays_soccer_01.png
#    - one picture is in a directory and a duplicate is in a subdirectory of this
#      directory; we should keep only the one with more specific sorting,
#      for example:
#	delete: transportation/turn_ahead.png
#       keep:   transportation/roadsigns/turn_ahead.png
for filename in `cat $TMP_DIR/png_list_double` ; do

    duplicate_paths_found=`grep "/$filename$" $TMP_DIR/png_list_full | sed "s|/$filename$||" | sort`

    primary_paths=
    for candidate in $duplicate_paths_found ; do
	# we must generate a new list of primary paths each time because
	# the current candidate would be a better one
        primary_paths_new=
	found=0
	for prim_path in $primary_paths ; do
	    if diff -q $candidate/$filename $prim_path/$filename >/dev/null ; then
		# files are the same
		candidate_len=${#candidate}
		if test "${prim_path:0:$candidate_len}" = "$candidate" ; then
		    # the candidate is only prefix to the old primary path,
		    # so remove the candidate as an ugly duplicate
		    remove_picture $candidate/$filename
		    primary_paths_new="$primary_paths_new $prim_path"
		else
		    prim_path_len=${#prim_path}
		    if test "${candidate:0:$prim_path_len}" = "$prim_path" ; then
			# the old primary path is only prefix to candidate,
			# so remove the old prim path as an ugly duplicate
			remove_picture $prim_path/$filename
		    fi
		    # we always set the candidate as the new primary path
		    # if the candidate is not an ugly duplicate; this trick
		    # help us to search for ugly duplicates (the paths in
		    # $duplicate_paths_found are sorted, so it should be
		    # enough to compare only two sequential paths, ...)
		    primary_paths_new="$primary_paths_new $candidate"
		fi
		found=1
	    else
		primary_paths_new="$primary_paths_new $prim_path"
	    fi
	done	
	# add candidate to the list if no same primary file found
	test "$found" = "0" && primary_paths_new="$primary_paths_new $candidate"
	# 
    	primary_paths="$primary_paths_new"
    done
    
    # replace the more reasonable (not removed) duplicates with symlinks
    for candidate in $duplicate_paths_found ; do
	test -f $candidate/$filename || continue;
	for prim_path in $primary_paths ; do
	    if test "$candidate" != "$prim_path" && \
	       diff -q $candidate/$filename $prim_path/$filename >/dev/null 2>&1  ; then
	        link_picure "$prim_path" "$candidate" "$filename"
	    fi
	done
    done    
done

rm -rf $TMP_DIR

echo
echo "Removing mess (files that are not pictures)"
echo

for file in `find ${DESTDIR}${PNG_HOME} -type f | grep -v -e ".png$" -e ".svg$"` ; do
    rm -v $file || exit 1;
done