Removing Maildir Duplicates
Jump to navigation
Jump to search
Sample Bash / Awk script. Not complete yet:
- File removal not done
- Complete with command-line options
#!/bin/bash
maxIDLen=60
rmIfMatch="\/.titi\/"
tmpFile="/tmp/$$"
rm -f $tmpFile>/dev/null
touch $tmpFile
for i in `find . -type f -path "*/cur/*"`; do
filename="$i"
msgID=`cat "${filename}" | egrep -i "^Message-Id" | sed -r "s/^.*:(.*)$/\1/"`
echo $msgID|awk -F @ -v tmpFile=$tmpFile -v maxIDLen=$maxIDLen -v filename=$filename '
{
if (length($1) > maxIDLen)
{
printf "***ERROR*** - Message ID too long: %s\n", $1 > "/dev/stderr"
}
else
{
printf "%-*s%s<><>%s\n", maxIDLen, $1, $2, filename>> tmpFile
}
}' tmpfile=$tmpfile
done
tmpFileSorted=${tmpFile}_sorted
tmpFileUnique=${tmpFile}_dup
tmpFileRemove=${tmpFile}_remove
tmpFileNotRemoved=${tmpFile}_notRemoved
tmpFileNotify=${tmpFile}_notify
cat $tmpFile | sort > $tmpFileSorted
cat $tmpFileSorted | uniq -D -w $maxIDLen>$tmpFileUnique
cat $tmpFileUnique | sed -r "/$rmIfMatch/!d; s/^.*<><>//">$tmpFileRemove
cat $tmpFileUnique | sed -r "/$rmIfMatch/d">$tmpFileNotRemoved
cat $tmpFileNotRemoved | uniq -D -w $maxIDLen>$tmpFileNotify
#echo "----------------------------------"
#echo "MESSAGE-ID LIST"
#echo "----------------------------------"
#cat $tmpFile
#echo "----------------------------------"
#echo "MESSAGE-ID LIST - SORTED"
#echo "----------------------------------"
#cat $tmpFileSorted
#echo "----------------------------------"
#echo "MESSAGE-ID LIST - DUPLICATE ID ONLY"
#echo "----------------------------------"
#cat $tmpFileUnique
echo "----------------------------------"
echo "MESSAGE-ID LIST - WILL BE DELETED"
echo "----------------------------------"
cat $tmpFileRemove
echo "----------------------------------"
echo "MESSAGE-ID LIST - DUPLICATE NOT DELETED AUTOMATICALLY"
echo "----------------------------------"
cat $tmpFileNotify
rm -f $tmpFile
rm -f ${tmpFile}_*