On an AMD Sempron 2000, the first search on 200,000+ files takes around 11 minutes. The second search takes around 2 seconds, and subsequent searches during a single run take around 1 second each. The first run builds the database, and the subsequent searches use it.
The file index is around 16MB, so it’s loaded into the system as a string, with no indexing tricks. Searches are simple regexs on the gigantic string. Searches return a Collection of strings.
This script was coded in Microsoft Excel’s VBA, but doesn’t use any Excel features.
If you need a fast, interactive file search, Locate32 looks nice. It’s even faster.
First file – create a new class, name it FindFile, and paste this code in
' add the Microsoft Scripting Runtime reference to the environemnt
Private fso As Scripting.FileSystemObject
Private databasePath As String
Private root As Drive
Private dbString As String
Private driveLetter As String
Private Sub Class_Initialize()
databasePath = "C:fdb.txt"
dbString = ""
End Sub
Public Sub SetDatabase(dbPath As String, dLetter As String)
databasePath = dbPath
driveLetter = dLetter
End Sub
Private Sub LoadDatabase()
If dbString = "" Then
Debug.Print "LoadDatabase from file " & databasePath
Set fso = New FileSystemObject
Set dbStream = fso.OpenTextFile(databasePath)
dbString = dbStream.ReadAll
Set dbStream = Nothing
Set fso = Nothing
End If
End Sub
' returns a collection of strings, one for each line that matched the substring
Public Function Find(substring As String) As Collection
' http://visualbasic.about.com/od/...
' http://www.regular-expressions.i...
Dim regex As RegExp
Dim matches As MatchCollection
Dim match As Object
Dim l As String
Dim result As Collection
Debug.Print "Finding " & Chr(34) & substring & Chr(34)
Call LoadDatabase
Set result = New Collection
Set regex = New RegExp
regex.Pattern = "^(.*" & substring & ".*)$"
regex.Global = True
regex.MultiLine = True
Debug.Print "Searching regex of " & regex.Pattern
Set matches = regex.Execute(dbString)
Debug.Print "Found " & matches.Count & " matches."
For Each match In matches
l = match.Value
' strip off trailing chars [chomp()]
If Right(l, 1) = Chr(13) Or Right(l, 1) = Chr(10) Then
l = Left(l, Len(l) - 1)
End If
If Right(l, 1) = Chr(13) Or Right(l, 1) = Chr(10) Then
l = Left(l, Len(l) - 1)
End If
result.Add (l)
Next
Set regex = Nothing
Set matches = Nothing
Set Find = result
End Function
Public Sub UpdateDatabaseIfOld()
Dim f As File
Set fso = New FileSystemObject
If fso.FileExists(databasePath) Then
Set f = fso.GetFile(databasePath)
' http://www.aspisfun.com/function...
If DateTime.DateDiff("d", f.DateLastModified, DateTime.Now) < 2 Then
Exit Sub
Else
Debug.Print "Date difference between file and now is " & DateTime.DateDiff("d", f.DateLastModified, DateTime.Now)
End If
Else
Debug.Print "No file at databasePath of " & databasePath
End If
Set fso = Nothing
Call UpdateDatabase ' gets here if it's old or nonexistent
End Sub
' usually, you should call UpdateDatabaseIfOld
Public Sub UpdateDatabase()
Dim dbStream As TextStream
Set fso = New FileSystemObject
If fso.FileExists(databasePath) Then
fso.DeleteFile (databasePath)
End If
Set dbStream = fso.CreateTextFile(databasePath)
Debug.Print ("scanning drive lettter " & driveLetter)
Set root = fso.Drives(driveLetter)
Call UpdateDatabaseRecurse(root.RootFolder, "", dbStream)
Call dbStream.Close
Set dbStream = Nothing
Set fso = Nothing
End Sub
Private Sub UpdateDatabaseRecurse(f As Folder, path As String, dbStream As TextStream)
Dim subf As Folder
Dim obj As File
Dim ln As String
For Each subf In f.SubFolders
' Debug.Print "folder " & root.driveLetter & ":" & path & subf.Name
On Error Resume Next
Call UpdateDatabaseRecurse(subf, path & subf.Name & "", dbStream)
Next
For Each obj In f.Files
ln = root.driveLetter & ":" & path & obj.Name
' Debug.Print "file " & ln
dbStream.WriteLine (ln)
Next
Set subf = Nothing
Set obj = Nothing
End Sub
Test case – save this to a module:
Sub test()
Dim c As Collection
Dim ff As FileFinder
Set ff = New FileFinder
Call ff.SetDatabase("C:fdb.txt", "c")
Call ff.UpdateDatabaseIfOld ' updates only if it's over a day old
Set c = ff.Find("passwd")
For Each s In c
Debug.Print s
Next
End Sub
Attachment | Size |
---|---|
FileFinder.cls | 4.01 KB |
Book1.xls | 32 KB |
driveGanalysis.pl.txt | 7.54 KB |