Here are some Excel macros that help to clean up data. Once cleaned, it’s easier to remove duplicates. (I used these to de-dupe a list exported from Outlook.)
Included is a rough version of MS Access’ Nz() function.
Public Sub SimplifyEmails()
' This subroutine scans a column, turning emails in this form:
' Joe Blow (joe@company.com)
' Into this form:
' joe@company.com
Dim Rng As Range
Set Rng = Application.Intersect(ActiveSheet.UsedRange, _
ActiveSheet.Columns(ActiveCell.Column))
Col = Rng.Column
N = 0
For R = Rng.Rows.Count To 2 Step -1
V = ActiveSheet.Cells(R, Col).Value
' Debug.Print V
If V <> Empty Then
If Nz(InStr(V, "(")) < Nz(InStr(V, ")")) _
And Nz(InStr(V, "(")) > 0 Then
Start = InStr(V, "(") + 1
Length = InStr(V, ")") - Start
newmail = "'" & Mid(V, Start, Length)
Debug.Print newmail
ActiveSheet.Cells(R, Col).Value = newmail
End If
End If
Next R
End Sub
Function Nz(a As Variant) As Variant
If IsNull(a) Then
Select Case a.Type
Case xlNumber
Nz = 0
Case Else
Nz = ""
End Select
Else
Nz = a
End If
End Function
Public Sub NormalizePhones()
Dim Rng As Range
Set Rng = Application.Intersect(ActiveSheet.UsedRange, _
ActiveSheet.Columns(ActiveCell.Column))
Col = Rng.Column
N = 0
For R = Rng.Rows.Count To 2 Step -1
V = ActiveSheet.Cells(R, Col).Value
' Debug.Print V
If V <> Empty Then
' first replace . with -
V = Replace(V, ".", "-")
' second if there's a dash in position 4, then turn it into parens
If InStr(V, "-") = 4 Then
V = "(" & Mid(V, 1, 3) & ") " & Mid(V, 5)
End If
' third strip any double spaces (replace with single space)
V = Replace(V, " ", " ")
' fourth if there's a space in position 4, then turn it into parens
If InStr(V, " ") = 4 Then
V = "(" & Mid(V, 1, 3) & ") " & Mid(V, 5)
End If
ActiveSheet.Cells(R, Col).Value = V
End If
Next R
End Sub
Public Sub TrimAllCells()
' removes leading and trailing spaces, and replaces double-spaces with single spaces
Dim Rng As Range
Set Rng = Application.Intersect(ActiveSheet.UsedRange, _
ActiveSheet.Columns(ActiveCell.Column))
Col = Rng.Columns.Count
N = 0
For R = Rng.Rows.Count To 2 Step -1
For C = Col To 2 Step -1
V = ActiveSheet.Cells(R, C).Value
If V <> Empty Then
' strip any double spaces (replace with single space)
V = Replace(V, " ", " ")
' ltrim and rtrim the data
V = LTrim(V)
V = RTrim(V)
ActiveSheet.Cells(R, C).Value = V
End If
Next C
Next R
End Sub