Skip to content

Commit aba10ea

Browse files
authored
Merge pull request #894 from reshmee011/debugcrawllog
New sample script to debug search issues using crawl log
2 parents b54bdbb + dd34f0c commit aba10ea

File tree

4 files changed

+186
-0
lines changed

4 files changed

+186
-0
lines changed
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
2+
3+
# Debugging SharePoint Search by inspecting crawl log
4+
5+
This script helps identify files that appear in the crawl logs but are still not searchable. The underlying cause can vary, and in many cases you may need to raise a support case with Microsoft for deeper investigation.
6+
7+
In our scenario, the issue was caused by sensitivity labels already applied to files that had been moved from another location. This created a conflict with the sensitivity label inherited from the parent library, which prevented the files from being indexed correctly.
8+
9+
A key indicator was that the `SPItemModifiedTime` field was blank in the crawl log, a symptom that often correlates with search indexing problems. To confirm the issue, I also performed a search query to verify that the affected files were indeed not discoverable.
10+
11+
## Summary
12+
13+
# [PnP PowerShell](#tab/pnpps)
14+
15+
```powershell
16+
cls
17+
18+
# ===== Settings =====
19+
$clientId = "xxxxxxxx"
20+
$dateTime = Get-Date -Format "yyyy-MM-dd-HH-mm-ss"
21+
22+
$invocation = Get-Variable -Name MyInvocation -ValueOnly
23+
$directoryPath = Split-Path $invocation.MyCommand.Path
24+
$csvPath = Join-Path $directoryPath "Sites.csv" # CSV must have a column 'SiteUrl'
25+
26+
# Ensure output folder exists
27+
$outputFolder = Join-Path $directoryPath "output_files"
28+
if (-not (Test-Path $outputFolder)) { New-Item -ItemType Directory -Path $outputFolder | Out-Null }
29+
$outputCsv = Join-Path $outputFolder ("CrawlLog-SPItemModifiedTime-Null-" + $dateTime + ".csv")
30+
31+
# System/ignored lists
32+
$ExcludedLists = @(
33+
"Access Requests","App Packages","appdata","appfiles","Apps in Testing","Cache Profiles","Composed Looks",
34+
"Content and Structure Reports","Content type publishing error log","Converted Forms","Device Channels",
35+
"Form Templates","fpdatasources","Get started with Apps for Office and SharePoint","List Template Gallery",
36+
"Long Running Operation Status","Maintenance Log Library","Images","site collection images","Master Docs",
37+
"Master Page Gallery","MicroFeed","NintexFormXml","Quick Deploy Items","Relationships List","Reusable Content",
38+
"Reporting Metadata","Reporting Templates","Search Config List","Site Assets","Preservation Hold Library",
39+
"Site Pages","Solution Gallery","Style Library","Suggested Content Browser Locations","Theme Gallery",
40+
"TaxonomyHiddenList","User Information List","Web Part Gallery","wfpub","wfsvc","Workflow History",
41+
"Workflow Tasks","Pages"
42+
)
43+
44+
# ===== Collect results =====
45+
$results = New-Object System.Collections.Generic.List[object]
46+
$sites = Import-Csv -Path $csvPath # expects column "SiteUrl"
47+
48+
foreach ($s in $sites) {
49+
$siteUrl = $s.SiteUrl
50+
Write-Host "Connecting to site: $siteUrl" -ForegroundColor Cyan
51+
52+
# Connect interactively with the client ID (adjust auth as needed for your tenant)
53+
Connect-PnPOnline -ClientId $clientId -Url $siteUrl -Interactive
54+
55+
# Get only visible document libraries
56+
$lists = Get-PnPList -Includes BaseType, BaseTemplate, Hidden, Title, ItemCount, RootFolder |
57+
Where-Object {
58+
$_.Hidden -eq $false -and
59+
$_.BaseType -eq "DocumentLibrary" -and
60+
$_.Title -notin $ExcludedLists
61+
}
62+
63+
foreach ($library in $lists) {
64+
# Build library URL: e.g. https://tenant/sites/site/Shared Documents
65+
$libraryUrl = ($siteUrl.TrimEnd('/')) + '/' + $library.rootfolder.Name
66+
Write-Host "Querying library: $($library.Title)" -ForegroundColor Yellow
67+
68+
# Keep row limit reasonable to avoid huge payloads
69+
$rowLimit = $library.ItemCount
70+
71+
# Pull crawl log entries; filter to items with null/empty SPItemModifiedTime
72+
$entries = Get-PnPSearchCrawlLog -Filter $libraryUrl -RowLimit $rowLimit -RawFormat |
73+
Where-Object { $_.SPItemModifiedTime -eq $null }
74+
75+
# Shape results for export; include FullUrl (fallback to DocumentUrl if missing)
76+
$output = $entries | Where-Object {$_.FullUrl -ne $libraryUrl -and $_.FullUrl -notlike "*`/Forms/Default.aspx" -and $_.FullUrl -notlike "*.aspx*" -and $_.FullUrl -notlike "*.one*"}
77+
78+
foreach($result in $output)
79+
{
80+
# Filter to a site/library path$result.FullUrl and select extra properties
81+
try{
82+
$kql = "Path:`"$($result.FullUrl)`""
83+
$searchr = Submit-PnPSearchQuery -Query $kql -All -SelectProperties @(
84+
"Title","Path"
85+
) -SortList @{LastModifiedTime="Descending"}
86+
87+
if($searchr.Rowcount -lt 1)
88+
{
89+
# Create a PSCustomObject row
90+
$projected = [pscustomobject]@{
91+
FullUrl = $result.FullUrl
92+
DocumentUrl = $libraryUrl
93+
SPItemModifiedTime = $result.SPItemModifiedTime
94+
ErrorCode = $result.ErrorCode
95+
}
96+
$results.Add($projected)
97+
98+
}
99+
}
100+
catch{
101+
Write-Error "$($_.Exception.Message) for $($result.FullUrl)"
102+
}
103+
}
104+
}
105+
# Disconnect-PnPOnline
106+
}
107+
108+
# ===== Export =====
109+
$results | Export-Csv -Path $outputCsv -NoTypeInformation -Encoding UTF8
110+
Write-Host "Export complete: $outputCsv" -ForegroundColor Green
111+
112+
```
113+
[!INCLUDE [More about PnP PowerShell](../../docfx/includes/MORE-PNPPS.md)]
114+
115+
116+
## Source Credit
117+
118+
Sample idea first appeared on [Debugging SharePoint Search with PnP PowerShell and Crawl Logs](https://reshmeeauckloo.com/posts/powershell-sharepoint-debugging-crawllog/).
119+
120+
## Contributors
121+
122+
| Author(s) |
123+
|-----------|
124+
| [Reshmee Auckloo](https://github.com/reshmee011) |
125+
126+
127+
[!INCLUDE [DISCLAIMER](../../docfx/includes/DISCLAIMER.md)]
128+
<img src="https://m365-visitor-stats.azurewebsites.net/script-samples/scripts/spo-crawllog-search-debugging" aria-hidden="true" />
28.4 KB
Loading
58.7 KB
Loading
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
[
2+
{
3+
"name": "spo-crawllog-search-debugging",
4+
"source": "pnp",
5+
"title": "Debugging SharePoint Search by inspecting crawl log",
6+
"shortDescription": "Debugging SharePoint Search by inspecting crawl log",
7+
"url": "https://pnp.github.io/script-samples/spo-crawllog-search-debugging/README.html",
8+
"longDescription": [
9+
"Debugging SharePoint Search by inspecting crawl log for files not searched despite present in crawl log without errors"
10+
],
11+
"creationDateTime": "2025-12-14",
12+
"updateDateTime": "2025-12-14",
13+
"products": [
14+
"SharePoint",
15+
"Search",
16+
"Crawl Log"
17+
],
18+
"metadata": [
19+
{
20+
"key": "PNP-POWERSHELL",
21+
"value": "3.1.0"
22+
}
23+
],
24+
"categories": [
25+
"Search",
26+
"Crawl Log"
27+
],
28+
"tags": [
29+
"Connect-PnPOnline",
30+
"Get-PnPList",
31+
"Get-PnPSearchCrawlLog",
32+
"Submit-PnPSearchQuery"
33+
],
34+
"thumbnails": [
35+
{
36+
"type": "image",
37+
"order": 100,
38+
"url": "https://raw.githubusercontent.com/pnp/script-samples/main/scripts/spo-crawllog-search-debugging/assets/example.png",
39+
"alt": "Preview of the issue"
40+
}
41+
],
42+
"authors": [
43+
{
44+
"gitHubAccount": "reshmee011",
45+
"company": "",
46+
"pictureUrl": "https://github.com/reshmee011.png",
47+
"name": "Reshmee Auckloo"
48+
}
49+
],
50+
"references": [
51+
{
52+
"name": "Want to learn more about PnP PowerShell and the cmdlets",
53+
"description": "Check out the PnP PowerShell site to get started and for the reference to the cmdlets.",
54+
"url": "https://aka.ms/pnp/powershell"
55+
}
56+
]
57+
}
58+
]

0 commit comments

Comments
 (0)