An Outlook Script to Save Spam for Training Spamassassin

On the spamassassin box, set up Samba and create two shares spamassassin-spam and spamassassin-ham. I created them in /home/Spamassassin/

Then, set up cron to run this script periodically. This script is a little weird looking, but it tries to do work in small chunks, because learning takes a bit of time. You don’t want the learning to run so long that the cron ends up running two copies of this job, basically breaking everything:

#! /bin/bash

if (test -e /tmp/deletespamnexttime) then
        rm /tmp/deletespamnexttime
        rm /tmp/spam

if (test -e /tmp/spam) then
        touch /tmp/deletespamnexttime

cd /home/Spamassassin/spam
ls > /tmp/spam
head -40 /tmp/spam | xargs --verbose -L 1 -d \n sa-learn --spam
head -40 /tmp/spam | xargs -L 1 -d \n -I _ rm -f -- "_"

cd /home/Spamassassin/ham
ls > /tmp/ham
head -40 /tmp/ham | xargs --verbose -L 1 -d \n sa-learn --ham
head -40 /tmp/ham | xargs -L 1 -d \n -I _ rm -f -- "_"

rm /tmp/spam
rm /tmp/ham

Then, in Outlook, add these macros. The WriteAsFile subroutine reads an email message and converts it to an EML file, which Spamassassin can read. You should then create an icon to run the MarkAsSpam macro. To use it, control-select any spam messages, and click the icon. The messages will be moved out to \saspamassassin-spam.

Sub MarkAsHam()
    CopyMessagesToFile ("\saspamassassin-ham")
End Sub

Sub MarkAsSpam()
    CopyMessagesToFile ("\saspamassassin-spam")
End Sub

' Move the selected message(s) to the given folder **************************
Function CopyMessagesToFile(folderName As String)

    Dim myOLApp As Application
    Dim myNameSpace As NameSpace
    Dim myInbox As MAPIFolder
    Dim currentMessage As MailItem
    Dim errorReport As String
    Set myOLApp = CreateObject("Outlook.Application")
    Set myNameSpace = myOLApp.GetNamespace("MAPI")
    Set myInbox = myNameSpace.GetDefaultFolder(olFolderInbox)

    ' Figure out if the active window is a list of messages or one message
    ' in its own window
    On Error GoTo QuitIfError    ' But if there's a problem, skip it
    Select Case myOLApp.ActiveWindow.Class
        ' The active window is a list of messages (folder); this means there
        ' might be several selected messages
        Case olExplorer
            Debug.Print "list of messages"
            For Each currentMessage In myOLApp.ActiveExplorer.Selection
                Call writeAsFile(folderName, currentMessage)
        ' The active window is a message window, meaning there will only
        ' be one selected message (the one in this window)
        Case olInspector
            Call writeAsFile(folderName, myOLApp.ActiveInspector.CurrentItem)
        ' can't handle any other kind of window; anything else will be ignored
    End Select
QuitIfError:       ' Come here if there was some kind of problem
    Set myOLApp = Nothing
    Set myNameSpace = Nothing
    Set myInbox = Nothing
    Set currentMessage = Nothing
End Function

Sub writeAsFile(folderName As String, item As MailItem)
    On Error GoTo Bail
    Dim x As MailItem
    Dim fn As String
    Set x = item
    Let fn = folderName & Right(x.EntryID, 64) & ".eml"
    Debug.Print "file will be " & fn
    Open fn For Output As #2
        Print #2, "From : " & x.SenderEmailAddress
        Print #2, "To: " & x.To
        Print #2, "Subject: " & x.Subject
        Print #2, "MIME-Version: 1.0"
        Print #2, "Content-Type: multipart/alternative;"
        Print #2, "        boundary = ""----=_NextPart_000_000D_01CCF6AD.D1159750"""
        Print #2, "Content-Language: en-us"
        Print #2, ""
        Print #2, "This is a multipart message in MIME format."
        Print #2, ""
        Print #2, "------=_NextPart_000_000D_01CCF6AD.D1159750"
        Print #2, "Content-Type: text/plain;"
        Print #2, "        Charset = ""us-ascii"""
        Print #2, "Content-Transfer-Encoding: 7bit"
        Print #2, ""
        Print #2, item.body
        Print #2, "------=_NextPart_000_000D_01CCF6AD.D1159750"
        Print #2, "Content-Type: text/html;"
        Print #2, "        Charset = ""UTF-8"""
        Print #2, "Content-Transfer-Encoding: 7-bit"
        Print #2, "Content-Disposition: inline"
        Print #2, ""
        Print #2, item.HTMLBody
        Print #2, "------=_NextPart_000_000D_01CCF6AD.D1159750--"
    On Error GoTo 0

    Close #1
    Close #2
    Set item = Nothing
End Sub