How to Extract documents from Multiple Document libraries in to Folders. Each has a Managed Metadata Column with Sub Terms.
I have created three Functions to
- Download Document
- Create Folder
- Main Method
Addition to above, script will log the Document URL and destination Folder Path in .log file
Create Folder
Function Create-Folder
{
param ($folderPath)
if (!(Test-Path -path $folderPath))
{
New-Item $folderPath -type directory
}
}
Download Document
This was stolen from ( Stackoverflow 😁 )
Function Download-Document
{
param($web, $folderPath, $docItem)
#File Download Snippet Reffered From : https://stackoverflow.com/questions/43350575/how-to-use-powershell-to-download-files-from-sharepoint
$File = $web.GetFile($docItem.Url)
$Binary = $File.OpenBinary()
$detinationPath = $folderPath + "\" + $File.Name;
$Stream = New-Object System.IO.FileStream($detinationPath), Create
$Writer = New-Object System.IO.BinaryWriter($Stream)
$Writer.write($Binary)
$Writer.Close()
Add-Content -Path $logFilePath -Value "$($docItem.ParentList.Title)|$($docItem.Url)|$($File.Name)|$($detinationPath)";
}
Folder Structure was like below (I have created two top level folders to keep the term related documents
$RootFolder ="Vehicles"
$SecondFolder="Brands"
$SecondFolderLibraryName="Volvo_Library" #Library Name (Title)
$ChildFolderColumnName="VehicleTypeColum" #Metadata Column Name
Main Method
Function Extract-DocLibrary
{
TRY
{
#Check for Root Folder
$rootFolderpath = $dataFolderPath+$RootFolder;
Create-Folder -folderPath $rootFolderpath ;
#check and Create Second Folder (Folder for Library)
$secondFolderPath = $dataFolderPath+$RootFolder+"\"+$SecondFolder;
Create-Folder -folderPath $secondFolderPath;
$spWeb = Get-SPWeb -Identity $siteURL;
$DocLibrary = $spWeb.Lists[$SecondFolderLibraryName]
IF($DocLibrary)
{
$allDocs = $DocLibrary.Items;
foreach ($document in $allDocs)
{
$metadataField = $document[$ChildFolderColumnName]
if(![string]::IsNullOrEmpty($metadataField))
{
Write-Host $metadataField.Label;
$FirstLevleLable= $metadataField.Label;
IF($FirstLevleLable.Contains(':'))
{
$isFirstLevel= $true;
#in this section I'm splitting the Sub Terms by ":"
$terms = $FirstLevleLable -split ':';
foreach ($term in $terms)
{
#creating Sub Folder Structure
IF($isFirstLevel)
{
$thirdLevelFolder = $secondFolderPath+"\"+$term;
$isFirstLevel =$false;
Write-Host " "$thirdLevelFolder -f Green;
Create-Folder -folderPath $thirdLevelFolder;
}
else
{
$thirdLevelFolder=$thirdLevelFolder+"\"+$term;
Write-Host " "$thirdLevelFolder -f Cyan;
Create-Folder -folderPath $thirdLevelFolder;
}
}
}
Else
{
$thirdLevelFolder = $secondFolderPath+"\"+$FirstLevleLable;
Write-Host $thirdLevelFolder -f Yellow
Create-Folder -folderPath $thirdLevelFolder;
#Download-Document -web $spWeb -folderPath $thirdLevelFolder -docItem $document;
}
Download-Document -web $spWeb -folderPath $thirdLevelFolder -docItem $document;
}
Else
{
#No Metadata Tag Updated for this Document
Write-Host $secondFolderPath;
Download-Document -web $spWeb -folderPath $secondFolderPath -docItem $document;
}
}
}
}
Catch
{
}
Finally
{
$spWeb.Dispose()
}
}
Extract-DocLibrary;
Find Entire Script from Below Section
Add-PSSnapin "Microsoft.SharePoint.PowerShell"
$timeStamp = Get-Date;
$ExecutionIDTitle =$timeStamp.ToString("yyyy-MM-dd-HH-mm");
$ExecutionID =$timeStamp.ToString("yyyyMMddHHmm");
$siteURL ="SiteURL";
$dataFolderPath = "FolderPath\Data\";
$logFilePath = "FolderPath\Log\$ExecutionID-Extraction.log";
Add-Content -Path $logFilePath -Value "SPLibraryName|SPDocURL|FileName|FolderPath"
$RootFolder ="Vehicles"
$SecondFolder="Brands"
$SecondFolderLibraryName="Volvo_Library" #Library Name (Title)
$ChildFolderColumnName="VehicleTypeColum" #Metadata Column Name
Function Download-Document
{
param($web, $folderPath, $docItem)
#File Download Snippet Reffered From : https://stackoverflow.com/questions/43350575/how-to-use-powershell-to-download-files-from-sharepoint
$File = $web.GetFile($docItem.Url)
$Binary = $File.OpenBinary()
$detinationPath = $folderPath + "\" + $File.Name;
$Stream = New-Object System.IO.FileStream($detinationPath), Create
$Writer = New-Object System.IO.BinaryWriter($Stream)
$Writer.write($Binary)
$Writer.Close()
Add-Content -Path $logFilePath -Value "$($docItem.ParentList.Title)|$($docItem.Url)|$($File.Name)|$($detinationPath)";
}
Function Create-Folder
{
param ($folderPath)
if (!(Test-Path -path $folderPath))
{
New-Item $folderPath -type directory
}
}
Function Extract-DocLibrary
{
TRY
{
#Check for Root Folder
$rootFolderpath = $dataFolderPath+$RootFolder;
Create-Folder -folderPath $rootFolderpath ;
#check and Create Second Folder (Folder for Library)
$secondFolderPath = $dataFolderPath+$RootFolder+"\"+$SecondFolder;
Create-Folder -folderPath $secondFolderPath;
$spWeb = Get-SPWeb -Identity $siteURL;
$DocLibrary = $spWeb.Lists[$SecondFolderLibraryName]
IF($DocLibrary)
{
$allDocs = $DocLibrary.Items;
foreach ($document in $allDocs)
{
$metadataField = $document[$ChildFolderColumnName]
if(![string]::IsNullOrEmpty($metadataField))
{
Write-Host $metadataField.Label;
$FirstLevleLable= $metadataField.Label;
IF($FirstLevleLable.Contains(':'))
{
$isFirstLevel= $true;
#in this section I'm splitting the Sub Terms by ":"
$terms = $FirstLevleLable -split ':';
foreach ($term in $terms)
{
#creating Sub Folder Structure
IF($isFirstLevel)
{
$thirdLevelFolder = $secondFolderPath+"\"+$term;
$isFirstLevel =$false;
Write-Host " "$thirdLevelFolder -f Green;
Create-Folder -folderPath $thirdLevelFolder;
}
else
{
$thirdLevelFolder=$thirdLevelFolder+"\"+$term;
Write-Host " "$thirdLevelFolder -f Cyan;
Create-Folder -folderPath $thirdLevelFolder;
}
}
}
Else
{
$thirdLevelFolder = $secondFolderPath+"\"+$FirstLevleLable;
Write-Host $thirdLevelFolder -f Yellow
Create-Folder -folderPath $thirdLevelFolder;
#Download-Document -web $spWeb -folderPath $thirdLevelFolder -docItem $document;
}
Download-Document -web $spWeb -folderPath $thirdLevelFolder -docItem $document;
}
Else
{
#No Metadata Tag Updated for this Document
Write-Host $secondFolderPath;
Download-Document -web $spWeb -folderPath $secondFolderPath -docItem $document;
}
}
}
}
Catch
{
}
Finally
{
$spWeb.Dispose()
}
}
Extract-DocLibrary;
Watch Video for more details