一串拆分"????????????????" "????"通常会导致["????"; "????????"],分解视觉角色.
我想要一个可以返回[????????????????"],而不是分解视觉字符.据我所知,这些"视觉字符"在.NET世界中被称为TextElements,或在其他地方被称为EGCs(扩展的图形集).
这里有一些细节:https://docs.microsoft.com/en-us/dotnet/api/system.globalization.textelementenumerator?view=net-6.0
我提出了一个糟糕的解决方案:
let ecgStringSplit'
(elements : string list)
(sep : string list)
: string list =
let startsWithSeparator (str : string list) =
sep = (str |> List.truncate sep.Length)
let mutable result = []
let mutable inProgress = []
let rec recursiveFn (elementsRemaining : string list) : unit =
if elementsRemaining = [] then
result <- result @ [ inProgress |> String.concat "" ]
elif startsWithSeparator elementsRemaining then
result <- result @ [ inProgress |> String.concat "" ]
inProgress <- []
recursiveFn (List.skip sep.Length elementsRemaining)
else
inProgress <- inProgress @ [ elementsRemaining.Head ]
recursiveFn elementsRemaining.Tail
recursiveFn elements
result
let toEgcList (s : string) : string list =
seq {
let tee = System.Globalization.StringInfo.GetTextElementEnumerator(s)
while tee.MoveNext() do
yield tee.GetTextElement()
} |> Seq.toList
let ecgStringSplit (str: string) (sep: string) =
ecgStringSplit' (toEgcList str) (toEgcList sep)
ecgStringSplit "Hello world 1" " " |> printfn "%A" // results in ["Hello"; "world"; "1"]
ecgStringSplit "????????????????" "????" |> printfn "%A" // results in ["????????????????"]
除了将其重构为不使用可变值之类的,我想知道我是否缺少一个更简单的解决方案来解决这个问题?
我很乐意采用一个C#解决方案,我可以将其移植到F.
编辑:使用F#列表以外的其他东西肯定会有助于提高性能.