Learn VisualBasic.NET with Me: (s)locate files on a disk with VBA

This is a class that will help you find files on the disk, without hitting the disk too much. It’s a simplified unix “slocate” library. The first time you use it, it creates an index of all the files on the drive. (Subsequent uses update the file when a day passes.) The index is loaded into memory, and searched with regular expressions.

On an AMD Sempron 2000, the first search on 200,000+ files takes around 11 minutes. The second search takes around 2 seconds, and subsequent searches during a single run take around 1 second each. The first run builds the database, and the subsequent searches use it.

The file index is around 16MB, so it’s loaded into the system as a string, with no indexing tricks. Searches are simple regexs on the gigantic string. Searches return a Collection of strings.

This script was coded in Microsoft Excel’s VBA, but doesn’t use any Excel features.

If you need a fast, interactive file search, Locate32 looks nice. It’s even faster.

First file – create a new class, name it FindFile, and paste this code in


' add the Microsoft Scripting Runtime reference to the environemnt
Private fso As Scripting.FileSystemObject
Private databasePath As String
Private root As Drive
Private dbString As String
Private driveLetter As String
Private Sub Class_Initialize()
    databasePath = "C:fdb.txt"
    dbString = ""
End Sub
Public Sub SetDatabase(dbPath As String, dLetter As String)
    databasePath = dbPath
    driveLetter = dLetter
End Sub
Private Sub LoadDatabase()
    If dbString = "" Then
        Debug.Print "LoadDatabase from file " & databasePath
        Set fso = New FileSystemObject
        Set dbStream = fso.OpenTextFile(databasePath)
        dbString = dbStream.ReadAll
        Set dbStream = Nothing
        Set fso = Nothing
    End If
End Sub
' returns a collection of strings, one for each line that matched the substring
Public Function Find(substring As String) As Collection
    ' http://visualbasic.about.com/od/...
    ' http://www.regular-expressions.i...
    Dim regex As RegExp
    Dim matches As MatchCollection
    Dim match As Object
    Dim l As String
    Dim result As Collection
    
    Debug.Print "Finding " & Chr(34) & substring & Chr(34)
    Call LoadDatabase
    
    Set result = New Collection
    
    Set regex = New RegExp
    regex.Pattern = "^(.*" & substring & ".*)$"
    regex.Global = True
    regex.MultiLine = True
    
    Debug.Print "Searching regex of " & regex.Pattern
    
    Set matches = regex.Execute(dbString)
    Debug.Print "Found " & matches.Count & " matches."
    For Each match In matches
        l = match.Value
        ' strip off trailing chars [chomp()]
        If Right(l, 1) = Chr(13) Or Right(l, 1) = Chr(10) Then
            l = Left(l, Len(l) - 1)
        End If
        If Right(l, 1) = Chr(13) Or Right(l, 1) = Chr(10) Then
            l = Left(l, Len(l) - 1)
        End If
        result.Add (l)
    Next
    Set regex = Nothing
    Set matches = Nothing
    Set Find = result
End Function
Public Sub UpdateDatabaseIfOld()
    Dim f As File
    Set fso = New FileSystemObject
    If fso.FileExists(databasePath) Then
        Set f = fso.GetFile(databasePath)
        ' http://www.aspisfun.com/function...
        If DateTime.DateDiff("d", f.DateLastModified, DateTime.Now) < 2 Then
            Exit Sub
        Else
            Debug.Print "Date difference between file and now is " & DateTime.DateDiff("d", f.DateLastModified, DateTime.Now)
        End If
    Else
        Debug.Print "No file at databasePath of " & databasePath
    End If
    Set fso = Nothing
    Call UpdateDatabase ' gets here if it's old or nonexistent
End Sub
' usually, you should call UpdateDatabaseIfOld
Public Sub UpdateDatabase()
    Dim dbStream As TextStream
    Set fso = New FileSystemObject
    If fso.FileExists(databasePath) Then
        fso.DeleteFile (databasePath)
    End If
    Set dbStream = fso.CreateTextFile(databasePath)
    Debug.Print ("scanning drive lettter " & driveLetter)
    Set root = fso.Drives(driveLetter)
    Call UpdateDatabaseRecurse(root.RootFolder, "", dbStream)
    Call dbStream.Close
    Set dbStream = Nothing
    Set fso = Nothing
End Sub
Private Sub UpdateDatabaseRecurse(f As Folder, path As String, dbStream As TextStream)
    Dim subf As Folder
    Dim obj As File
    Dim ln As String
    For Each subf In f.SubFolders
        ' Debug.Print "folder " & root.driveLetter & ":" & path & subf.Name
        On Error Resume Next
            Call UpdateDatabaseRecurse(subf, path & subf.Name & "", dbStream)
    Next
    For Each obj In f.Files
        ln = root.driveLetter & ":" & path & obj.Name
        ' Debug.Print "file " & ln
        dbStream.WriteLine (ln)
    Next
    Set subf = Nothing
    Set obj = Nothing
End Sub

Test case – save this to a module:


Sub test()
    Dim c As Collection
    Dim ff As FileFinder
    Set ff = New FileFinder
    Call ff.SetDatabase("C:fdb.txt", "c")
    Call ff.UpdateDatabaseIfOld ' updates only if it's over a day old
    Set c = ff.Find("passwd")
    For Each s In c
        Debug.Print s
    Next
End Sub
Attachment Size
FileFinder.cls 4.01 KB
Book1.xls 32 KB
driveGanalysis.pl.txt 7.54 KB