1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
| import spacy
nlp = spacy.load('en_core_web_sm')
doc = nlp("Hello, how are you?") for token in doc: print(token.text, token.pos_)
import spacy
nlp = spacy.load('zh_core_web_sm')
def extract_dialogue_with_spacy_chinese(text): doc = nlp(text) dialogues = [] current_speaker = None current_dialogue = [] for sent in doc.sents: for ent in sent.ents: if ent.label_ == "PERSON": if current_speaker: dialogues.append((current_speaker, " ".join(current_dialogue))) current_speaker = ent.text current_dialogue = [] break if current_speaker: current_dialogue.append(sent.text.strip()) if current_dialogue: dialogues.append((current_speaker, " ".join(current_dialogue))) return dialogues
script = """ 张三: 你好吗? 李四: 我很好,谢谢! 张三: 太好了! """
dialogues = extract_dialogue_with_spacy_chinese(script) for character, line in dialogues: print(f"人物: {character}, 对话: {line}")
|