2 # Script Name: rmdupe http://igurublog.wordpress.com/downloads/script-rmdupe/
4 # License: GNU GENERAL PUBLIC LICENSE Version 3 http://www.gnu.org/licenses/gpl-3.0.txt
12 Usage: rmdupe [OPTIONS] FOLDER [...]
13 Removes duplicate files in specified folders. By default, newest duplicates
16 -R, -r search specified folders recursively
17 --ref FOLDER also search FOLDER recursively for copies but don't
18 remove any files from here (multiple --ref allowed)
19 Note: files may be removed from a ref folder if that
20 folder is also a specified folder
21 --trash FOLDER copy duplicate files to FOLDER instead of removing
22 --sim simulate and report duplicates only - no removal
23 --quiet minimize output (disabled if used with --sim)
24 --verbose detailed output
25 --old remove oldest duplicates instead of newest
26 --minsize SIZE limit search to duplicate files SIZE MB and larger
27 --maxsize SIZE limit search to duplicate files SIZE MB and smaller
28 --rmcmd "RMCMD" execute RMCMD instead of rm to remove copies
29 (may contain arguments, eg: "rm -f" or "shred -u")
30 --xdev don't descend to other filesystems when recursing
31 specified or ref folders
32 Notes: do not use wildcards; symlinks are not followed except on the
33 command line; zero-length files are ignored
35 Instructions and updates:"
36 http://igurublog.wordpress.com/downloads/script-rmdupe/
48 if [ "${1:0:1}" = "-" ]; then
72 if [ "$2" == "" ] || [ "${2:0:1}" = "-" ]; then
73 echo Option $1 requires argument
76 if [ ! -d "$2" ]; then
77 echo "rmdupe: Error: ref folder \"$2\" does not exist"
85 if [ "$2" == "" ] || [ "${2:0:1}" = "-" ]; then
86 echo Option $1 requires argument
89 if [ "$trash" != "" ]; then
90 echo "rmdupe: Error: only one trash folder allowed"
97 if [ "$2" == "" ] || [ "${2:0:1}" = "-" ]; then
98 echo Option $1 requires argument
100 elif [ "$rmcmd" != "rm" ]; then
101 echo "rmdupe: Error: only one rmcmd accepted"
108 if [ "$2" == "" ] || [ "${2:0:1}" = "-" ]; then
109 echo Option $1 requires argument
113 if (( minsize < 1 )); then
114 echo "rmdupe: Error: invalid minsize"
117 (( minsize = minsize * 1024 * 1024 ))
121 if [ "$2" == "" ] || [ "${2:0:1}" = "-" ]; then
122 echo Option $1 requires argument
126 if (( maxsize < 1 )); then
127 echo "rmdupe: Error: invalid maxsize"
130 (( maxsize = maxsize * 1024 * 1024 ))
134 echo Unknown option $1
139 if [ "$1" = "/" ] || [ "$1" = "/." ]; then
140 echo "rmdupe: Error: use on folder / not permitted for safety"
143 if [ "${1:0:1}" != "/" ]; then
144 echo "rmdupe: Error: relative folder spec not permitted for safety"
147 if [ ! -d "$1" ]; then
148 echo "rmdupe: Error: folder \"$1\" does not exist"
151 dfolder[$dcount]="$1"
156 if (( dcount == 0 )); then
159 if [ "$trash" != "" ] && [ "$rmcmd" != "rm" ]; then
160 echo "rmdupe: Error: can't specify both --trash and --rmcmd"
164 removefile () { # $1=file
165 if (( optquiet != 1 )); then
168 removesize=$(stat -c%s "$1")
169 if [ "$trash" != "" ]; then
171 if (( optsim != 1 )); then
173 trashfile="`basename "$1"`"
174 ext="${trashfile##*.}"
175 if [ "$ext" = "$trashfile" ]; then
179 while [ -e "$trash/$trashfile" ]; do
181 if [ "$ext" = "" ]; then
182 trashfile="`basename "$1"`""-copy$num"
184 trashfile="`basename "$1" "$ext"`""copy$num.$ext"
187 mv "$1" "$trash/$trashfile"
188 if [ "$?" != "0" ] || [ -e "$1" ]; then
189 echo " rmdupe: Error: move to trash failed for copy $1"
191 (( removecount += 1 ))
192 ((totalsize += removesize ))
195 sremoved[$sremovedcount]="$1"
196 (( sremovedcount += 1 ))
197 (( removecount += 1 ))
198 ((totalsize += removesize ))
202 if (( optverbose == 1 )) && [ "$rmcmd" != "rm" ]; then
203 echo " > $rmcmd \"$1\""
205 if (( optsim == 1 )); then
206 sremoved[$sremovedcount]="$1"
207 (( sremovedcount += 1 ))
208 (( removecount += 1 ))
209 ((totalsize += removesize ))
215 echo " rmdupe: Error: removal failed of copy $1"
217 (( removecount += 1 ))
218 ((totalsize += removesize ))
224 simcheck () { # $1=file
225 # file has been sim-removed?
227 while (( srx < sremovedcount )); do
228 if [ "$1" = "${sremoved[$srx]}" ]; then
236 checkdupe () { # $1=file $2="ref"(optional - reference mode); uses $curfile
237 # $1 or $curfile may be removed in non-reference mode
238 # $curfile may be removed in reference mode
239 # $curfile is set to empty if removed
241 if [ "$1" = "$curfile" ]; then
245 if (( optsim == 1 )); then
247 if [ "$?" = "0" ]; then
249 if [ "$?" != "0" ]; then
256 if [ -f "$1" ] && [ -f "$curfile" ]; then
257 if [ "$2" == "ref" ]; then
263 test1="[$curfile][$1]"
264 test2="[$1][$curfile]"
266 while (( crx < comparedcount )); do
267 if [ "$test1" = "${compared[$crx]}" ] || [ "$test2" = "${compared[$crx]}" ]; then
273 if [ "$crx" != "-1" ]; then
275 if (( optverbose == 1 )); then
276 echo " Comparing to$refmsg: $1"
278 cmp -s "$curfile" "$1"
279 if [ "$?" = "0" ]; then
281 if [ "$2" = "ref" ]; then
283 removefile "$curfile"
286 # non-ref mode - determine which is older
288 curage=$(stat -c%Z "$curfile")
289 curage="${curage%.*}" # corrects .000000000 bug in stat
290 if [ "$?" != "0" ] || [ "$curage" = "" ]; then
291 echo " rmdupe: Error: get timestamp failed on $curfile"
294 otherage=$(stat -c%Z "$1")
295 otherage="${otherage%.*}" # corrects .000000000 bug in stat
296 if [ "$?" != "0" ] || [ "$otherage" = "" ]; then
297 echo " rmdupe: Error: get timestamp failed on $1"
300 if (( err == 0 )); then
301 if (( curage < otherage )); then
303 if (( optold == 1 )); then
304 removefile "$curfile"
311 if (( optold == 1 )); then
314 removefile "$curfile"
321 # not a copy, remember compare
322 compared[$comparedcount]="[$curfile][$1]"
323 (( comparedcount += 1 ))
326 if (( optverbose == 1 )); then
327 echo " Already compared to$refmsg: $1"
337 if [ "$trash" != "" ]; then
339 if (( optsim != 1 )); then
341 if [ ! -d "$trash" ]; then
342 echo "rmdupe: Error: trash folder $trash could not be created"
349 if (( optsim == 1 )); then
353 if (( optverbose == 1 )); then
356 if (( minsize != 0 )) && ((maxsize != 0 )) && (( minsize > maxsize )); then
357 echo "rmdupe: minsize greater than maxsize - nothing to do"
362 while (( dx < dcount )); do
363 dfolders="$dfolders \"${dfolder[$dx]}\""
368 while (( rx < rcount )); do
369 rfolders="$rfolders \"${rfolder[$rx]}\""
374 # find all files in dfolders
375 if (( optrecurse == 1 )); then
380 flist=`eval find -H $dfolders $xdev $md -type f`
381 if [ "$flist" = "" ]; then
382 if (( optquiet != 1 )); then
383 echo "rmdupe: no files found in specified folders - nothing to do"
388 # check for dupes of files
394 if (( optsim == 1 )); then
396 if [ "$?" != "0" ]; then
400 if [ -f "$f" ] && (( simremoved == 0 )); then
401 fsize=$(stat -c%s "$f")
404 if (( minsize > 0 )) && (( fsize < minsize )); then
406 elif (( maxsize > 0 )) && (( fsize > maxsize )); then
409 if [ "$?" != "0" ] || [ "$fsize" = "" ] || (( fsize == 0 )); then
410 if [ "$fsize" != "0" ]; then
411 echo "rmdupe: Error: get filesize failed on $f"
412 elif (( optverbose == 1 )); then
413 echo "Ignoring zero-length file: $f"
415 elif (( sizeok == 1 )); then
416 if (( optverbose == 1 )); then
417 echo "Searching for copies of: $f"
420 if [ "$dfolders" != "" ]; then # safety
422 dlist=`eval find -H $dfolders $xdev $md -type f -size $fsizec`
423 if [ "$dlist" != "" ]; then
425 checkdupe "$df" # may remove $curfile
426 if [ "$curfile" = "" ]; then
428 if (( optverbose == 1 )); then
429 echo "XSearching for copies of: $curfile"
436 if [ "$rfolders" != "" ]; then
437 rlist=`eval find -H $rfolders $xdev -type f -size $fsizec`
438 if [ "$rlist" != "" ]; then
440 checkdupe "$rf" "ref" # may remove $curfile
441 if [ "$curfile" = "" ]; then
447 elif (( optverbose == 1 )); then
448 echo "Skipping $fsize-byte file $f"
455 if (( optquiet != 1 )); then
456 if (( removecount == 0 )); then
457 echo "No duplicates were found"
459 elif (( removecount == 1 )); then
462 msg="duplicates were"
464 if [ "$trash" != "" ]; then
469 (( totalsize = totalsize / 1024 / 1024 ))
471 if (( optsim == 1 )); then
472 echo "$removecount $msg sim-$rmmsg (approx $totalsize MB)"
474 echo "$removecount $msg $rmmsg (approx $totalsize MB)"
481 # 1.0.4: fixed cannot remove filenames with special characters #2 #3
482 # 1.0.3: corrected for stat %Z .000000000 bug