HOW TO GENERATE A CUSTOM SITEMAP CRAWLER THROUGH POWERSHELL SCRIPT

We have a requirement to generate all Sitecore URLs that will redirect to 404  or a status code not returning 200. In such a case we create a PowerShell script that will execute at some interval through the Sitecore scheduler and crawl all URLs that come under the sitemap.  

In such cases every URL we received returned 404 pages. We bundled all the URLs and sent them to the respective admin.

For each item, the script retrieves the status of that URL by using Get-StausCode and loops through them. If the URL returns 404 then, the script updates the list that stores all 404 url's.

Overall, the Script will give the list of URLs that will not return the 200 status code.


###### Start Global Variables ######

$Server_Env = [System.Configuration.ConfigurationManager]::AppSettings['Setting Key']

$siteURL = 'https://xyz.com'

$EnvironmentURL = 'xyz.com'

$EmailSubject = "Email Subject"

$EmailBody = "Email body"

$FromEmail = [System.Configuration.ConfigurationManager]::AppSettings['Setting Key']

$ToEmail = "abc.con, xzy.com" #multiple email id can be add here with comma seprator

$ListOfUrls = @();

###### End Global Variables ######


function Get-StausCode($ItemUrl) {

  try {

    $req = [System.Net.WebRequest]::Create($ItemUrl)

    $resp = $req.GetResponse()

    $HTTP_Status = [int]$resp.StatusCode

    if ($HTTP_Status -ne $null) {

      $addInList = New-Object System.Object

      $addInList | Add-Member -type NoteProperty -name URL -Value $ItemUrl

      $addInList | Add-Member -type NoteProperty -name StatusCode -Value $HTTP_Status  

      return $addInList

    }

  }

  catch {

    Write-Host Something went wrong during the get status code.

  }

  finally {

    if ($resp) {

      $resp.Close()

      $resp.Dispose()

    }

  }

}


function Create-Report($listOfUrls) {

  $title = "URL Status Report"

  $datetime = Get-Date -format "yyyy-MM-d_hhmmss"

  $format = & ([scriptblock]::Create($exportProperty))

  $head = $head + '<style>table,h2{font-family:Arial,Helvetica,sans-serif;border-collapse:collapse}table tr:nth-child(2n){background-color:#f2f2f2}table tr:hover{background-color:#ddd}table th{text-align:left;background-color:#4a692f;color:#fff;padding:10px ;}table td{padding:6px;}</style>';

  if ($listOfUrls ) {

    $formattedBody = '<h2>' + $title + '</h2>' + '<p>* Status code 404 -- URL Not Found</p><p>* Status code 500 -- Bad request</p>' + $formattedBody

  }

  else {

    $formattedBody = '<h2>' + $title + '</h2>' + '<p>There is no url that is giving 404/500 status</p>' + $formattedBody

  }

   

  $reportResult = [PSCustomObject]$listOfUrls | 

  Select-Object -Property $format | 

  ConvertTo-Html -Head $head -Body $formattedBody -Title $title | 

  ForEach-Object { [System.Web.HttpUtility]::HtmlDecode($_) } | 

  Out-String

  $attachmentBytes = [System.Text.Encoding]::UTF8.GetBytes($reportResult)

  $memoryStream = New-Object System.IO.MemoryStream

  $memoryStream.Write($attachmentBytes, 0, $attachmentBytes.Length)

  $memoryStream.Seek(0, [System.IO.SeekOrigin]::Begin) > $null

        

  $contentType = New-Object "System.Net.Mime.ContentType"

  $contentType.MediaType = [System.Net.Mime.MediaTypeNames+Text]::Html

  $contentType.Name = "$title-$datetime.html"

  $attachment = New-Object System.Net.Mail.Attachment($memoryStream, $contentType)

  return $attachment

}


function Send-Report($attachment) {

  $msg = new-object Net.Mail.MailMessage

  $msg.From = $FromEmail

  $msg.To.Add($ToEmail)

  $msg.Subject = $EmailSubject

  $msg.Body = $EmailBody

  $msg.Attachments.Add($attachment)

  $smtpPort = "587"

  $SMTPClient = New-Object Net.Mail.SmtpClient($smtpServer, $smtpPort)

  $SMTPClient.EnableSsl = $true

  $SMTPClient.Credentials = New-Object System.Net.NetworkCredential($smtpUsername, $smtpPassword);

  $SMTPClient.Send($msg)

}


if ($Server_Env -eq $EnvironmentURL) {

  $SitemapURL = $siteURL + '/sitemap.xml'

  Write-host "Fetching file from " + $SitemapURL

  $sitemap = Invoke-RestMethod $SitemapURL -UseBasicParsing

  if ($sitemap.urlset -ne $null) {

    foreach ($service in $sitemap.urlset.url) {

      $url = $service.loc.replace("https://www.zxc.com", $siteURL)     

      $statusCodeRes = Get-StausCode($url)

      Write-Host $statusCodeRes.URL---- $statusCodeRes.StatusCode

      if ($statusCodeRes.StatusCode -ne 200) {

        $ListOfUrls += $statusCodeRes

      }

    }

  }

  Send-Report(Create-Report($ListOfUrls))

  Write-Host Report has been sent

}

Comments

Popular posts from this blog

Content Hub Integration with XM Cloud from Sitecore DAM directly from Sitecore DAM

Mastering Sitecore Search API Crawlers: A Comprehensive Series (Part 1-3)

Removing Edge Content and Purging Edge Cache with Sitecore XM Cloud Admin API