应该是其中的hidden text like Field's code text个导致了这个问题.NetOffice、Microsoft.Office.Interop.Word或VBA等.
你可以先试试我的代码.尽管到目前为止这还不是一个完美的解决方案,但请注意以下块代码片段:
if (range.Text != searchText)
{
Console.WriteLine(range.Text);
System.Diagnostics.Debugger.Break();
}
至少它为调试指明了方向,知道问题出在哪里.您可以按照此方向进行进一步的改进.
using NetOffice.WordApi.Enums;
using Word = NetOffice.WordApi;
Test();
//The following code applies only to the content( main body) of the document itself and does not include the footnote, comments, header, footer ......, and other parts of the document.
void Test()
{
//just test file for me
//const string fFullnameStr = @"C:\Users\oscar\Dropbox\VS\VBA\stackoverflow.docm";
const string fFullnameStr = @"C:\Users\oscar\Dropbox\VS\stackoverflow\VBA\Naive Bayes classifier.docx";
Word.Application wordApplication = new Word.Application();
wordApplication.DisplayAlerts = WdAlertLevel.wdAlertsNone;
wordApplication.Visible = true; //just for test to watch
Word.Document doc = wordApplication.Documents.Open(fFullnameStr);//Context.WordDocument;
/* for test
if(doc.ProtectionType!= WdProtectionType.wdAllowOnlyFormFields)
Console.WriteLine(doc.ProtectionType);
doc.Close();
doc.Protect(WdProtectionType.wdAllowOnlyFormFields);
just for test */
int i = 0;
//var searchText = "smth text";
// https://github.com/Aldman/ProtectedRangeSearch/blob/main/FindTextTests.cs#L15
var searchText = "based on a common";//"diameter features";//"based on a common";//"assume that the value";
var bookmarkName = "newBookmark";
Word.Range rng = doc.Content;//doc.Content.Duplicate;
if (doc.ProtectionType != WdProtectionType.wdAllowOnlyFormFields)
{
if (doc.ActiveWindow.View.ShowFieldCodes)
doc.ActiveWindow.View.ShowFieldCodes = false;
while (rng.Find.Execute(findText: searchText, matchCase: true, matchWholeWord: true, matchWildcards: false,
matchSoundsLike: false, matchAllWordForms: false, forward: true, wrap: WdFindWrap.wdFindStop))
{
rng.Bookmarks.Add(bookmarkName + i++.ToString()); //rng.Select();//just for test
}
}
else
{
foreach (var paragraph in rng.Paragraphs)//http://msdn.microsoft.com/en-us/en-us/Iibrary/office/ff837006.aspx 轉址為:https://learn.microsoft.com/en-us/office/vba/api/Word.Range.Paragraphs
{
Word.Range range = paragraph.Range;
var text = range.Text;
var index = text.IndexOf(searchText); int indexPre = index;
var start = 0;
#region GetParagraphTextWithHiddenSymbols
foreach (Word.Field item in range.Fields)
{
index = text.IndexOf(item.Result.Text, start);
if (index >= 0)
{
text = text.Substring(0, index) + "{" + item.Code.Text + "}" + item.Result.Text + ((char)21).ToString()
+ text.Substring(index + item.Result.Text.Length);
start = (text.Substring(0, index) + "{" + item.Code.Text + "}" + item.Result.Text + ((char)21).ToString()).Length;
}
//text = text.Replace(item.Result.Text,
//"{" +item.Code.Text+"}"+ item.Result.Text + (char)21);
//fieldsResultLength += item.Result.Text.Length + 2 + 1;//2="{}" of field code,1=chr(21) placehold of the fields
}
start = 0;
//there will be "" both the start and end of a ContentControl object, so have to plus 2 for the two placeholders
foreach (Word.ContentControl item in range.ContentControls)
{
text = text.Substring(start, item.Range.Start - 1) + " " + item.Range.Text + " " + text.Substring(item.Range.End - 1);
}
#endregion
while (index >= 0)
{
index = text.IndexOf(searchText);
start = range.Start;
var end = range.End;
start += index; //+ fieldsResultLength;
end = start + searchText.Length;
range.SetRange(start, end);
while (range.Text != searchText && end <= range.End)
{
range.SetRange(++start, ++end);
if (range.Text == searchText) break;
}
if (range.Text != searchText)
{
Console.WriteLine(range.Text);
System.Diagnostics.Debugger.Break();
}
range.Bookmarks.Add(bookmarkName + i++.ToString());
text = paragraph.Range.Text; start = 0;
index = text.IndexOf(searchText, indexPre + 1);
indexPre = index;
}
}
}
wordApplication.Visible = true; //just for test to watch
doc.ActiveWindow.View.ReadingLayout = false;//just for test to watch
if (doc.ProtectionType != WdProtectionType.wdNoProtection)
doc.Unprotect(123.ToString());//just for test
}
当保护类型类似于wdAllowOnlyFormFields
时,Find个对象不能执行搜索是逻辑必然的.我认为这是因为Find对象类不仅是一个Find类,而且还包括一个替换(编辑)功能.您需要取消它的保护,或者更改保护它的方式,或者 Select 使用当前的替代方案,我在上面的代码中对这两种方案的流进行了条件调整.除了使用这种foreach paragraph
方法进行定位外,您还可以考虑使用正则表达式来实现这一点.无论使用哪种方法,都必须对隐藏的文本进行适当的处理,如Fields‘代码文本,才能获得准确的结果.
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="NetOfficeFw.Core" Version="1.9.3" />
<PackageReference Include="NetOfficeFw.Word" Version="1.9.3" />
</ItemGroup>
<ItemGroup>
<FrameworkReference Include="Microsoft.WindowsDesktop.App.WindowsForms" />
</ItemGroup>
</Project>
void Test_ShowFieldCodes()
{
//just test file for me
const string fFullnameStr = @"C:\Users\oscar\Dropbox\VS\VBA\stackoverflow.docm";
Word.Application wordApplication = new Word.Application();
wordApplication.DisplayAlerts = WdAlertLevel.wdAlertsNone;
//wordApplication.Visible = true; //just for test to watch
Word.Document doc = wordApplication.Documents.Open(fFullnameStr);//Context.WordDocument;
int i = 0;
var searchText = "smth text";
var bookmarkName = "newBookmark";
Word.Range rng = doc.Content;//doc.Content.Duplicate;
if (doc.ProtectionType != WdProtectionType.wdAllowOnlyFormFields)
{
while (rng.Find.Execute(findText: searchText, matchCase: true, matchWholeWord: true, matchWildcards: false,
matchSoundsLike: false, matchAllWordForms: false, forward: true, wrap: WdFindWrap.wdFindStop))
{
if ((bool)rng.Information(WdInformation.wdInContentControl))
rng.SetRange(rng.Paragraphs[1].Range.ContentControls[1].Range.End + 1,
rng.Paragraphs[1].Range.ContentControls[1].Range.End + 1);
rng.Bookmarks.Add(bookmarkName + i++.ToString());
}
}
else
{ //rng = doc.Content.Duplicate;
foreach (var paragraph in rng.Paragraphs)//http://msdn.microsoft.com/en-us/en-us/Iibrary/office/ff837006.aspx 轉址為:https://learn.microsoft.com/en-us/office/vba/api/Word.Range.Paragraphs
{
Word.Range range = paragraph.Range;
var text = range.Text;
var index = text.IndexOf(searchText); int indexPre = 0;
var start = 0;
while (index >= 0)
{
if (paragraph.Range.Fields.Count > 0)
{
doc.ActiveWindow.View.ShowFieldCodes = true;
text = paragraph.Range.Text;
//if there are fields this will be the index of ShowFieldCodes=false + index of ShowFieldCodes=true and plus 1
index = index + text.IndexOf(searchText, indexPre) + 1;
doc.ActiveWindow.View.ShowFieldCodes = false;
}
start = range.Start;
var end = range.End;
start += index;
end = start + searchText.Length;
range.SetRange(start, end);
while (range.Text != searchText && end <= range.End && range.End < doc.Content.End - 1)
{
//range.Select();//just for test
range.SetRange(++start, ++end);
if (range.Text == searchText) break;
}
if (range.Text != searchText && range.End < doc.Content.End - 1)
{
Console.WriteLine(range.Text);
System.Diagnostics.Debugger.Break();
}
if (range.Text == searchText)
{
if ((bool)range.Information(WdInformation.wdInContentControl))
range.SetRange(range.Paragraphs[1].Range.ContentControls[1].Range.End + 1,
range.Paragraphs[1].Range.ContentControls[1].Range.End + 1);
range.Bookmarks.Add(bookmarkName + i++.ToString());
}
text = paragraph.Range.Text; start = 0;
index = text.IndexOf(searchText, indexPre + 1);
indexPre = index;
}
}
}
wordApplication.Visible = true; //just for test to watch
//doc.Unprotect(1.ToString());//just for test
}
20230712内容控件
所以答案是,在your file中没有字段,它所有的文件都是ContentControl而不是Fields!ActiveDocument.ContentControls.Count
是3.ActiveDocument.Fields.Count
是0.
新代码在上面进行了更新.