I'm trying to figure out why Apple's Natural Language API returns unexpected results.
我做错了什么?这是语法问题吗?
我有以下四个字符串,我想提取每个单词的"stem form"
// text 1 has two "accredited" in a different order
let text1: String = "accredit accredited accrediting accredited accreditation accredits"
// text 2 has three "accredited" in different order
let text2: String = "accredit accredits accredited accrediting accredited accredited accreditation"
// text 3 has "accreditation"
let text3: String = "accreditation"
// text 4 has "accredited"
let text4: String = "accredited"
问题在于单词accreditation和accredited.
"accreditation"这个词再也没有出现在词干上.和accredited根据单词在字符串中的顺序返回不同的结果,如附图中的文本1和文本2所示.
我已经用了Apple's documentation的密码
以下是SwiftUI的完整代码:
import SwiftUI
import NaturalLanguage
struct ContentView: View {
// text 1 has two "accredited" in a different order
let text1: String = "accredit accredited accrediting accredited accreditation accredits"
// text 2 has three "accredited" in a different order
let text2: String = "accredit accredits accredited accrediting accredited accredited accreditation"
// text 3 has "accreditation"
let text3: String = "accreditation"
// text 4 has "accredited"
let text4: String = "accredited"
var body: some View {
ScrollView {
VStack {
Text("Text 1").bold()
tagText(text: text1, scheme: .lemma).padding(.bottom)
Text("Text 2").bold()
tagText(text: text2, scheme: .lemma).padding(.bottom)
Text("Text 3").bold()
tagText(text: text3, scheme: .lemma).padding(.bottom)
Text("Text 4").bold()
tagText(text: text4, scheme: .lemma).padding(.bottom)
}
}
}
// MARK: - tagText
func tagText(text: String, scheme: NLTagScheme) -> some View {
VStack {
ForEach(partsOfSpeechTagger(for: text, scheme: scheme)) { word in
Text(word.description)
}
}
}
// MARK: - partsOfSpeechTagger
func partsOfSpeechTagger(for text: String, scheme: NLTagScheme) -> [NLPTagResult] {
var listOfTaggedWords: [NLPTagResult] = []
let tagger = NLTagger(tagSchemes: [scheme])
tagger.string = text
let range = text.startIndex..<text.endIndex
let options: NLTagger.Options = [.omitPunctuation, .omitWhitespace]
tagger.enumerateTags(in: range, unit: .word, scheme: scheme, options: options) { tag, tokenRange in
if let tag = tag {
let word: String = String(text[tokenRange])
let result = NLPTagResult(word: word, tag: tag)
//if !word.localizedCaseInsensitiveContains(tag.rawValue) {
listOfTaggedWords.append(result)
//}
}
return true
}
return listOfTaggedWords
}
// MARK: - NLPTagResult
struct NLPTagResult: Identifiable, Equatable, Hashable, Comparable {
var id = UUID()
var word: String
var tag: NLTag?
var description: String {
var newString: String = "\(word)"
if let tag = tag {
newString += " : \(tag.rawValue)"
}
return newString
}
// MARK: - Equatable & Hashable requirements
static func == (lhs: Self, rhs: Self) -> Bool {
lhs.id == rhs.id
}
func hash(into hasher: inout Hasher) {
hasher.combine(id)
}
// MARK: - Comparable requirements
static func <(lhs: NLPTagResult, rhs: NLPTagResult) -> Bool {
lhs.id.uuidString < rhs.id.uuidString
}
}
}
// MARK: - Previews
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
谢谢你的帮助!