Removing Maildir Duplicates

From miki
Revision as of 22:49, 30 January 2009 by Mip (talk | contribs) (New page: Sample Bash / Awk script. Not complete yet: * File removal not done * Complete with command-line options <source lang="bash"> #!/bin/bash maxIDLen=60 rmIfMatch="\/.titi\/" tmpFile="/tm...)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Sample Bash / Awk script. Not complete yet:

  • File removal not done
  • Complete with command-line options
#!/bin/bash

maxIDLen=60
rmIfMatch="\/.titi\/"

tmpFile="/tmp/$$"
rm -f $tmpFile>/dev/null
touch $tmpFile

for i in `find . -type f -path "*/cur/*"`; do
        filename="$i"
        msgID=`cat "${filename}" | egrep -i "^Message-Id" | sed -r "s/^.*:(.*)$/\1/"`
        echo $msgID|awk -F @ -v tmpFile=$tmpFile -v maxIDLen=$maxIDLen -v filename=$filename '
        {
                if (length($1) > maxIDLen)
                {
                        printf "***ERROR*** - Message ID too long: %s\n", $1 > "/dev/stderr"
                }
                else
                {
                        printf "%-*s%s<><>%s\n", maxIDLen, $1, $2, filename>> tmpFile
                }
        }' tmpfile=$tmpfile
done

tmpFileSorted=${tmpFile}_sorted
tmpFileUnique=${tmpFile}_dup
tmpFileRemove=${tmpFile}_remove
tmpFileNotRemoved=${tmpFile}_notRemoved
tmpFileNotify=${tmpFile}_notify
cat $tmpFile | sort > $tmpFileSorted
cat $tmpFileSorted | uniq -D -w $maxIDLen>$tmpFileUnique
cat $tmpFileUnique | sed -r "/$rmIfMatch/!d; s/^.*<><>//">$tmpFileRemove
cat $tmpFileUnique | sed -r "/$rmIfMatch/d">$tmpFileNotRemoved
cat $tmpFileNotRemoved | uniq -D -w $maxIDLen>$tmpFileNotify

#echo "----------------------------------"
#echo "MESSAGE-ID LIST"
#echo "----------------------------------"
#cat $tmpFile

#echo "----------------------------------"
#echo "MESSAGE-ID LIST - SORTED"
#echo "----------------------------------"
#cat $tmpFileSorted

#echo "----------------------------------"
#echo "MESSAGE-ID LIST - DUPLICATE ID ONLY"
#echo "----------------------------------"
#cat $tmpFileUnique

echo "----------------------------------"
echo "MESSAGE-ID LIST - WILL BE DELETED"
echo "----------------------------------"
cat $tmpFileRemove

echo "----------------------------------"
echo "MESSAGE-ID LIST - DUPLICATE NOT DELETED AUTOMATICALLY"
echo "----------------------------------"
cat $tmpFileNotify

rm -f $tmpFile
rm -f ${tmpFile}_*