Tek-Tips is the largest IT community on the Internet today!

Members share and learn making Tek-Tips Forums the best source of peer-reviewed technical information on the Internet!

  • Congratulations IamaSherpa on being selected by the Tek-Tips community for having the most helpful posts in the forums last week. Way to Go!

Copying biggest files first recursively

Status
Not open for further replies.

Leozack

MIS
Oct 25, 2002
867
GB
Hi all - I've been banging my head in this morning trying to solve a task I can't believe no one has wanted to do before - that being to copy a directory & subdirs & all files over to another location but strictly copying the BIGGEST files first. Why? Because, from what I can see, this will help stop fragmenting big files being copied to USB flash memory due to the way it seems to copy the file then move it, leaving a gap the size of the file. Bigger files can't fit in that gap so make their own. ETc etc. End result - biggest first SHOULD hopefully mean 1 gap used for all copies and the resulting files all lined up contiguously after it.

So this is what I've got so far - 2 problems to fix :
1 - only 1 level of directory is made on the destination path if it doesn't exist - I need it to make as many as necessary that don't exist yet
2 - when the first copy starts it says "not enough space" even though there is like 30g left on this device to copy a 4g file.

All input welcome!

_________________________________
Leozack
Code:
MakeUniverse($infinity,1,42);
 
Whilst a USB can and does get fragmented this does not affect performance in the same way as a normal HDD, since a USB is a solid state random access device, rather than a physical serial device.
 
This is down to wanting contiguous iso/image files, not due to anal retentativeness for non-fragmented files :p
Looking forward to any help. Looking around now for vbs stuff to do with copying file trees whole, to help.

_________________________________
Leozack
Code:
MakeUniverse($infinity,1,42);
 
Just realised I totally forgot to paste in what I have so far ...
Code:
strPath = "C:\Data\Images\"
strDestPath = "E:\"
Set DataList = CreateObject("ADODB.Recordset")
DataList.Fields.Append "strFilePath", 200, 255 ' adVarChar
DataList.Fields.Append "strFileName", 200, 255 ' adVarChar
DataList.Fields.Append "strFileSize", 3, 4 ' adDouble
DataList.Open

Set objFSO = CreateObject("Scripting.FileSystemObject")
Set objFolder = objFSO.GetFolder(strPath)

If Right(strPath, 1) = "\" Then strPath = Left(strPath, Len(strPath) - 1)
If Right(strDestPath, 1) = "\" Then strDestPath = Left(strDestPath, Len(strDestPath) - 1)

'wscript.echo strPath & " " & strDestPath

For Each objFile In objFolder.Files
 Call ListFile (objFile, objFolder)
Next

DoSubfolders objFSO.GetFolder(strPath)

DataList.Sort = "strFileSize DESC"
DataList.MoveFirst
Do Until DataList.EOF
 strFilePath = DataList.Fields.Item("strFilePath")
 strFile = DataList.Fields.Item("strFileName")
 strFileName = DataList.Fields.Item("strFileSize")
 strFileSizeLG = Len(strFileSize)
 intPadding = 15 - strFileSizeLG
 strDisplayName = strFile & Space(intPadding)

 'wscript.echo strFilePath & "\" & strFile & " == " & strDestPath & Replace(strFilePath,strPath,"") & "\" & strFile
 'wscript.echo strFilePath & "\" & strFile & "," & strDestPath & Replace(strFilePath,strPath,"") & "\"

 If Not(objFSO.FileExists(strDestPath & Replace(strFilePath,strPath,"") & "\" & strFile)) Then
  If Not(objFSO.FolderExists(strDestPath & Replace(strFilePath,strPath,"") & "\")) Then
   objFSO.CreateFolder strDestPath & Replace(strFilePath,strPath,"")
  End If
  wscript.echo strFilePath & "\" & strFile, strDestPath & Replace(strFilePath,strPath,"") & "\"
  objFSO.CopyFile strFilePath & "\" & strFile, strDestPath & Replace(strFilePath,strPath,"") & "\",True
 End If
 DataList.MoveNext
Loop

Sub DoSubFolders(Folder)
    For Each Subfolder in Folder.SubFolders
        Set objFolder = objFSO.GetFolder(Subfolder.Path)
        Set colFiles = objFolder.Files
        For Each objFile in colFiles
            Call ListFile (objFile, objFolder)
        Next
        DoSubFolders Subfolder
    Next
End Sub

Sub ListFile (objFile, objFolder)
 DataList.AddNew
 DataList("strFilePath") = objFSO.GetAbsolutePathName(objFolder)
 DataList("strFileName") = objFile.Name
 DataList("strFileSize") = Int(objFile.Size/1000)
 If DataList("strFileSize") = 0 Then DataList("strFileSize") = 1
 DataList.Update
End Sub

Set DataList = Nothing : Set objFSO = Nothing : Set objFolder = Nothing

_________________________________
Leozack
Code:
MakeUniverse($infinity,1,42);
 
Ok so I'm now using a new attempt, though it still won't copy due to lack of space (which my original script said, which is rubbish, it has 30g free (though it's fat32...windows will copy the files on np)
Code:
Const strRootPath = "C:\Data\Images\"
Const strDestPath = "E:\"
Const dictKey = 1
Const dictItem = 2
dim tmp

Dim oFSO, oDict

Main

Sub Main()
    Set oFSO = CreateObject("Scripting.FileSystemObject")
    Set oDict = CreateObject("Scripting.Dictionary")
    ProcessFolder strRootPath
    CopyBiggestFirst
    Set oDict = Nothing
    Set oFSO = Nothing
End Sub

Sub ProcessFolder(sFDR)
    Dim oFDR, oFile
    For Each oFile In oFSO.GetFolder(sFDR).Files
        'Wscript.Echo oFile.Size & vbTab & oFile.Path
		tmp = Int(oFile.Size/1000)
		if tmp = 0 Then tmp = 1
        oDict.Add oFile.Path, tmp ' Key: FilePath, Value: Size
    Next
    For Each oFDR In oFSO.GetFolder(sFDR).SubFolders
        ProcessFolder (oFDR.Path)
    Next
End Sub

Sub CopyBiggestFirst()
    Dim oKeys, oItems, sFileSrc, sFileDst
    'Wscript.Echo vbCrLf & "CopyBiggestFirst()"
    SortDictionary oDict, dictItem
    oKeys = oDict.Keys
    oItems = oDict.Items
    For i = 0 To oDict.Count - 1
        Wscript.Echo oKeys(i) & " | " & oItems(i)
        sFileSrc = oKeys(i)
        sFileDst = Replace(sFileSrc, strRootPath, strDestPath)
        CreateFolder oFSO.GetFile(sFileSrc).ParentFolder.Path
        oFSO.CopyFile sFileSrc, sFileDst
    Next
End Sub

Sub CreateFolder(sFDR)
    Dim sPath
    sPath = Replace(sFDR, strRootPath, strDestPath)
    If Not oFSO.FolderExists(sPath) Then
        CreateFolder (oFSO.GetFolder(sFDR).ParentFolder.Path)
        oFSO.CreateFolder sPath
    End If
End Sub

Function GetFolder(sFile)
    GetFolder = oFSO.GetFile(sFile).ParentFolder.Path
End Function

Function SortDictionary(oDict, intSort)
    Dim strDict()
    Dim objKey
    Dim strKey, strItem
    Dim X, Y, Z
    Z = oDict.Count
    If Z > 1 Then
        ReDim strDict(Z, 2)
        X = 0
        For Each objKey In oDict
            strDict(X, dictKey) = CStr(objKey)
			'wscript.echo oDict(objKey)
            strDict(X, dictItem) = CLng(oDict(objKey))
            X = X + 1
        Next
        For X = 0 To (Z - 2)
            For Y = X To (Z - 1)
                If strDict(X, intSort) < strDict(Y, intSort) Then
                    strKey = strDict(X, dictKey)
                    strItem = strDict(X, dictItem)
                    strDict(X, dictKey) = strDict(Y, dictKey)
                    strDict(X, dictItem) = strDict(Y, dictItem)
                    strDict(Y, dictKey) = strKey
                    strDict(Y, dictItem) = strItem
                End If
            Next
        Next
        oDict.RemoveAll
        For X = 0 To (Z - 1)
            oDict.Add strDict(X, dictKey), strDict(X, dictItem)
        Next
    End If
End Function

_________________________________
Leozack
Code:
MakeUniverse($infinity,1,42);
 
My code (given I started with some of your ...) is pretty similar. Haven't tested it against a USB drive, though.

Code:
[blue]Dim fso
Dim strRootSource, strRootDest
Dim rsFiles

' Assumes existence of root destination folder
strRootSource = "c:\temp"
strRootDest = "c:\destination"
CopyTree strRootSource

Sub CopyTree(strSource) ', strDest)
    Set rsFiles = CreateObject("ADODB.Recordset")
    rsFiles.Fields.Append "Source", 200, 255 ' adVarChar
    rsFiles.Fields.Append "Destination", 200, 255 ' adVarChar
    rsFiles.Fields.Append "Size", 3, 4 ' adDouble
    rsFiles.Open
    rsFiles.Sort = "Size DESC"
    
    Set fso = New FileSystemObject

    Recurse strSource
    
    ' Now we have all the sources and destination in order
    ' and correct folder hierarchy in destination
    rsFiles.MoveFirst
    Do Until rsFiles.EOF
        fso.CopyFile rsFiles("Source"), rsFiles("Destination")
        rsFiles.MoveNext
    Loop
End Sub


Function Recurse(strSource)
   
    Dim myitem, subfolder
    For Each myitem In fso.GetFolder(strSource).Files
        rsFiles.AddNew
        rsFiles("Source") = myitem.Path
        rsFiles("Destination") = Replace(myitem.Path, fso.GetFolder(strRootSource), fso.GetFolder(strRootDest)) 'fso.BuildPath(fso.GetFolder(strDest), Replace(myitem.Path, fso.GetFolder(strRootSource), ""))
        rsFiles("Size") = myitem.Size
        ' Build any necessary subfolder in destination as we walk down tree
        subfolder = fso.GetParentFolderName(rsFiles("Destination"))
        If Not fso.FolderExists(subfolder) Then fso.CreateFolder subfolder
    Next
       
    For Each myitem In fso.GetFolder(strSource).SubFolders
        Recurse myitem.Path
    Next

End Function[/blue]
 
Looks good - I've been testing with my above code (2nd style) and reached the same point where it won't copy due to lack of space. If I set it ot be C drive it copies fine. I'll try it with files <4g and probably find that works and that's the problem with both the USBs I've tried - they're FAT32. Windows copies to it fine but apparently not my script :/ I'll test yours too. My latest version complete with some error checks (2nd style, you have elaborated (and probably fixed) the 1st style)
Code:
Dim strRootPath, strDestPath
Const dictKey = 1
Const dictItem = 2
Dim tmp
Dim oFSO, oDict

'------------------- CHANGE PATHS --------------------------
strRootPath = "C:\Data\Images"
strDestPath = "F:" '"C:\Copy" '"E:\"
'-----------------------------------------------------------

Main

Sub Main()
    Set oFSO = CreateObject("Scripting.FileSystemObject")
    Set oDict = CreateObject("Scripting.Dictionary")
    If Right(strRootPath, 1) <> "\" Then strRootPath = strRootPath & "\"
    If Right(strDestPath, 1) <> "\" Then strDestPath = strDestPath & "\"
	If Not oFSO.FolderExists(strRootPath) Then : wscript.echo "Missing Source : " & strRootPath : wscript.quit
	If Not oFSO.FolderExists(strDestPath) Then : wscript.echo "Missing Destination : " & strDestPath : wscript.quit
    ProcessFolder strRootPath
    CopyBiggestFirst
    Set oDict = Nothing
    Set oFSO = Nothing
End Sub

Sub ProcessFolder(sFDR)
    Dim oFDR, oFile
    For Each oFile In oFSO.GetFolder(sFDR).Files
        'Wscript.Echo oFile.Size & vbTab & oFile.Path
		tmp = Int(oFile.Size/1000)
		if tmp = 0 Then tmp = 1
        oDict.Add oFile.Path, tmp ' Key: FilePath, Value: Size
    Next
    For Each oFDR In oFSO.GetFolder(sFDR).SubFolders
        ProcessFolder (oFDR.Path)
    Next
End Sub

Sub CopyBiggestFirst()
    Dim oKeys, oItems, sFileSrc, sFileDst
    'Wscript.Echo vbCrLf & "CopyBiggestFirst()"
    SortDictionary oDict, dictItem
    oKeys = oDict.Keys
    oItems = oDict.Items
    For i = 0 To oDict.Count - 1
        'Wscript.Echo oKeys(i) & " | " & oItems(i)
        sFileSrc = oKeys(i)
        sFileDst = Replace(sFileSrc, strRootPath, strDestPath)
        CreateFolder oFSO.GetFile(sFileSrc).ParentFolder.Path
        oFSO.CopyFile sFileSrc, sFileDst
    Next
End Sub

Sub CreateFolder(sFDR)
    Dim sPath
    sPath = Replace(sFDR, strRootPath, strDestPath)
    If Not oFSO.FolderExists(sPath) Then
        CreateFolder (oFSO.GetFolder(sFDR).ParentFolder.Path)
        oFSO.CreateFolder sPath
    End If
End Sub

Function GetFolder(sFile)
    GetFolder = oFSO.GetFile(sFile).ParentFolder.Path
End Function

Function SortDictionary(oDict, intSort)
    Dim strDict()
    Dim objKey
    Dim strKey, strItem
    Dim X, Y, Z
    Z = oDict.Count
    If Z > 1 Then
        ReDim strDict(Z, 2)
        X = 0
        For Each objKey In oDict
            strDict(X, dictKey) = CStr(objKey)
			'wscript.echo oDict(objKey)
            strDict(X, dictItem) = CLng(oDict(objKey))
            X = X + 1
        Next
        For X = 0 To (Z - 2)
            For Y = X To (Z - 1)
                If strDict(X, intSort) < strDict(Y, intSort) Then
                    strKey = strDict(X, dictKey)
                    strItem = strDict(X, dictItem)
                    strDict(X, dictKey) = strDict(Y, dictKey)
                    strDict(X, dictItem) = strDict(Y, dictItem)
                    strDict(Y, dictKey) = strKey
                    strDict(Y, dictItem) = strItem
                End If
            Next
        Next
        oDict.RemoveAll
        For X = 0 To (Z - 1)
            oDict.Add strDict(X, dictKey), strDict(X, dictItem)
        Next
    End If
End Function

_________________________________
Leozack
Code:
MakeUniverse($infinity,1,42);
 
Running your code against my C drive, I had to correct the new fso line to be
"Set fso = CreateObject("Scripting.FileSystemObject")
I then ran it and got the error while it went through the source tree it seems, saying
Error Line 39, Char 9, Multiple-step operation generated errors. Check each status value.

_________________________________
Leozack
Code:
MakeUniverse($infinity,1,42);
 
>"Set fso = CreateObject("Scripting.FileSystemObject")

Mea culpa. I was porting from VB6, and failed to make that necessary change.

> Multiple-step operation generated errors. Check each status value

Ah - I am not scaling the size to fit into a 4 byte signed integer, that's the problem here. So either scale it as you do in your own code, or change


rsFiles.Fields.Append "Size", 3, 4

to

rsFiles.Fields.Append "Size", 20
 
>they're FAT32 ,,, Windows copies to it fine

Hmm. Actually, Windows should have similar problems copying files bigger than 4Gb to a FAT32 volume - 4Gb is the maximum size FAT32 supports ...

You can always reformat the USBs to NTFS ...
 
I confirmed the issue by removing the 4g+ files and then the script ran and copied files fine (my 2nd style one, and I guess your reworked 1st style one will too).

Unfortunately NTFS isn't usable by some things, such as DOS mode ghost, which is what these large images are for. And windows copies the 4.x gig files fine. But vbs just gives the error. I don't know if there's anything I can do about this or if the whole thing is a waste of time (obviously the script works fine but won't do what I wanted it to do - load a FAT32 USB stick up with lots of big images starting with the biggest). Hmmmm :/

_________________________________
Leozack
Code:
MakeUniverse($infinity,1,42);
 
>And windows copies the 4.x gig files fine

Not sure how it is doing that successfully. As previously stated FAT32 has a maximum filesize of 4Gb. There isn't a trick that Windows can pull to increase this.
 
Well FAT32 formats fine upto 32g and even beyond with the right tools. And copying the 4g files to the USB works fine but the problem was how fragged they got hence wanting to copy largest first so only 1 'gap' was left that all other copies would go into during transit, rather than holes everywhere then fragging what's left.
So now I can't explain it, but so far the script can't do what I can instantly do with copy/paste - and therein lies the entire point of it :( I dunno where I can turn really

_________________________________
Leozack
Code:
MakeUniverse($infinity,1,42);
 
>Well FAT32 formats fine upto 32g and even beyond with the right tools

Sure. It'll do up to 16Tb volumes. But no individual file can be more than 4Gb in size.
 
OK! I've now sorted the scripts in both flavours, and added some catches and informing messages - couldn't help myself ;)
I also found I could resize my ghost images using ghost explorer to make them <4g so that now they copy to USB - yay! The only downside to all of this is somehow some of the files still fragged but hey - these scripts still work perfectly as intended :) Take your pick!
Code:
Dim strRootPath, strDestPath
Const dictKey = 1
Const dictItem = 2
Dim tmp, totalSize
Dim oFSO, oDict

'------------------- CHANGE PATHS --------------------------
strRootPath = "C:\Data\Images\"
strDestPath = "E:\"
'-----------------------------------------------------------

Main

Sub Main()
    Set oFSO = CreateObject("Scripting.FileSystemObject")
    Set oDict = CreateObject("Scripting.Dictionary")
    If Right(strRootPath, 1) <> "\" Then strRootPath = strRootPath & "\"
    If Right(strDestPath, 1) <> "\" Then strDestPath = strDestPath & "\"
	If Not oFSO.FolderExists(strRootPath) Then : wscript.echo "Missing Source : " & strRootPath : wscript.quit
	If Not oFSO.FolderExists(strDestPath) Then : wscript.echo "Missing Destination : " & strDestPath : wscript.quit
    ProcessFolder strRootPath
	
	If totalSize = 0 Then : wscript.echo "No files to copy!" : wscript.quit
	totalSize = totalSize/1024
	If totalSize < 1 Then totalSize = 1
	wscript.echo FormatNumber(totalSize,2) & " Mb to copy - press OK then wait for 'Finished' message"
	
    CopyBiggestFirst
    Set oDict = Nothing
    Set oFSO = Nothing
	wscript.echo "Finished!"
End Sub

Sub ProcessFolder(sFDR)
    Dim oFDR, oFile
    For Each oFile In oFSO.GetFolder(sFDR).Files
        'Wscript.Echo oFile.Size & vbTab & oFile.Path
		tmp = Int(oFile.Size/1024)
		if tmp < 1 Then tmp = 1
        oDict.Add oFile.Path, tmp ' Key: FilePath, Value: Size
		totalSize = totalSize + tmp
    Next
    For Each oFDR In oFSO.GetFolder(sFDR).SubFolders
        ProcessFolder (oFDR.Path)
    Next
End Sub

Sub CopyBiggestFirst()
    Dim oKeys, oItems, sFileSrc, sFileDst
    'Wscript.Echo vbCrLf & "CopyBiggestFirst()"
    SortDictionary oDict, dictItem
    oKeys = oDict.Keys
    oItems = oDict.Items
    For i = 0 To oDict.Count - 1
        'Wscript.Echo oKeys(i) & " | " & oItems(i)
        sFileSrc = oKeys(i)
        sFileDst = Replace(sFileSrc, strRootPath, strDestPath)
        CreateFolder oFSO.GetFile(sFileSrc).ParentFolder.Path
        oFSO.CopyFile sFileSrc, sFileDst
    Next
End Sub

Sub CreateFolder(sFDR)
    Dim sPath
    sPath = Replace(sFDR, strRootPath, strDestPath)
    If Not oFSO.FolderExists(sPath) Then
        CreateFolder (oFSO.GetFolder(sFDR).ParentFolder.Path)
        oFSO.CreateFolder sPath
    End If
End Sub

Function GetFolder(sFile)
    GetFolder = oFSO.GetFile(sFile).ParentFolder.Path
End Function

Function SortDictionary(oDict, intSort)
    Dim strDict()
    Dim objKey
    Dim strKey, strItem
    Dim X, Y, Z
    Z = oDict.Count
    If Z > 1 Then
        ReDim strDict(Z, 2)
        X = 0
        For Each objKey In oDict
            strDict(X, dictKey) = CStr(objKey)
			'wscript.echo oDict(objKey)
            strDict(X, dictItem) = CLng(oDict(objKey))
            X = X + 1
        Next
        For X = 0 To (Z - 2)
            For Y = X To (Z - 1)
                If strDict(X, intSort) < strDict(Y, intSort) Then
                    strKey = strDict(X, dictKey)
                    strItem = strDict(X, dictItem)
                    strDict(X, dictKey) = strDict(Y, dictKey)
                    strDict(X, dictItem) = strDict(Y, dictItem)
                    strDict(Y, dictKey) = strKey
                    strDict(Y, dictItem) = strItem
                End If
            Next
        Next
        oDict.RemoveAll
        For X = 0 To (Z - 1)
            oDict.Add strDict(X, dictKey), strDict(X, dictItem)
        Next
    End If
End Function
and
Code:
Dim fso
Dim strRootSource, strRootDest
Dim rsFiles
dim totalSize
Set fso = CreateObject("Scripting.FileSystemObject")

'------------------- CHANGE PATHS --------------------------
strRootSource = "c:\data\images\"
strRootDest = "e:\"
'-----------------------------------------------------------

If Right(strRootSource, 1) <> "\" Then strRootSource = strRootSource & "\"
If Right(strRootDest, 1) <> "\" Then strRootDest = strRootDest & "\"
If Not fso.FolderExists(strRootSource) Then : wscript.echo "Missing Source : " & strRootSource : wscript.quit
If Not fso.FolderExists(strRootDest) Then : wscript.echo "Missing Destination : " & strRootDest : wscript.quit

CopyTree strRootSource
wscript.echo "Finished!"

Sub CopyTree(strSource) ', strDest)
    Set rsFiles = CreateObject("ADODB.Recordset")
    rsFiles.Fields.Append "Source", 200, 560 'double 255 byte limit ' 255 ' adVarChar
    rsFiles.Fields.Append "Destination", 200, 560 'double 255 byte limit '255 ' adVarChar
    rsFiles.Fields.Append "Size", 20 ' adBigInt      '3, 4 ' adDouble
    rsFiles.Open
    rsFiles.Sort = "Size DESC"
    
    Recurse strSource
    
	If totalSize = 0 Then : wscript.echo "No files to copy!" : wscript.quit
	totalSize = totalSize/1024000
	If totalSize < 1 Then totalSize = 1
	wscript.echo FormatNumber(totalSize,2) & " Mb to copy - press OK then wait for 'Finished' message"
	
    ' Source hierarchy scanned and duplicated to destination
    rsFiles.MoveFirst
    Do Until rsFiles.EOF
        fso.CopyFile rsFiles("Source"), rsFiles("Destination")
        rsFiles.MoveNext
    Loop
End Sub


Function Recurse(strSource)
   
    Dim myitem, subfolder
    For Each myitem In fso.GetFolder(strSource).Files
        rsFiles.AddNew
        rsFiles("Source") = myitem.Path
        rsFiles("Destination") = Replace(myitem.Path, fso.GetFolder(strRootSource), fso.GetFolder(strRootDest))
        rsFiles("Size") = myitem.Size
		totalSize = totalSize + myitem.Size
        ' Build any necessary subfolder in destination as we walk down tree
        subfolder = fso.GetParentFolderName(rsFiles("Destination"))
        If Not fso.FolderExists(subfolder) Then fso.CreateFolder subfolder
    Next
       
    For Each myitem In fso.GetFolder(strSource).SubFolders
        Recurse myitem.Path
    Next

End Function

_________________________________
Leozack
Code:
MakeUniverse($infinity,1,42);
 
Status
Not open for further replies.

Part and Inventory Search

Sponsor

Back
Top