Я пытаюсь запустить скрипт python, который использует NLTK-токенизацию внутри. Вот часть кода из сценария, который инициализирует NLTKnltk 'unknown url' error
class NLTKTagger:
'''
class that supplies part of speech tags using NLTK
note: avoids the NLTK downloader (see __init__ method)
'''
def __init__(self):
import nltk
from nltk.tag import PerceptronTagger
from nltk.tokenize import TreebankWordTokenizer
tokenizer_fn = os.path.abspath(resource_filename('phrasemachine.data', 'punkt.english.pickle'))
tagger_fn = os.path.abspath(resource_filename('phrasemachine.data', 'averaged_perceptron_tagger.pickle'))
# Load the tagger
self.tagger = PerceptronTagger(load=False)
self.tagger.load(tagger_fn)
# note: nltk.word_tokenize calls the TreebankWordTokenizer, but uses the downloader.
# Calling the TreebankWordTokenizer like this allows skipping the downloader.
# It seems the TreebankWordTokenizer uses PTB tokenization = regexes. i.e. no downloads
# https://github.com/nltk/nltk/blob/develop/nltk/tokenize/treebank.py#L25
self.tokenize = TreebankWordTokenizer().tokenize
self.sent_detector = nltk.data.load(tokenizer_fn)
Я получаю следующее сообщение об ошибке
Traceback (most recent call last):
File "C:\Users\Uzair\Desktop\phrasemachine_test.py", line 3, in <module>
phrasemachine.get_phrases(text)
File "C:\Program Files\Python36-32\lib\site-packages\phrasemachine\phrasemachine.py", line 260, in get_phrases
tagger = TAGGER_NAMES[tagger]()
File "C:\Program Files\Python36-32\lib\site-packages\phrasemachine\phrasemachine.py", line 173, in get_stdeng_nltk_tagger
tagger = NLTKTagger()
File "C:\Program Files\Python36-32\lib\site-packages\phrasemachine\phrasemachine.py", line 140, in __init__
self.tagger.load(tagger_fn)
File "C:\Program Files\Python36-32\lib\site-packages\nltk\tag\perceptron.py", line 209, in load
self.model.weights, self.tagdict, self.classes = load(loc)
File "C:\Program Files\Python36-32\lib\site-packages\nltk\data.py", line 801, in load
opened_resource = _open(resource_url)
File "C:\Program Files\Python36-32\lib\site-packages\nltk\data.py", line 924, in _open
return urlopen(resource_url)
File "C:\Program Files\Python36-32\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Program Files\Python36-32\lib\urllib\request.py", line 526, in open
response = self._open(req, data)
File "C:\Program Files\Python36-32\lib\urllib\request.py", line 549, in _open
'unknown_open', req)
File "C:\Program Files\Python36-32\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Program Files\Python36-32\lib\urllib\request.py", line 1388, in unknown_open
raise URLError('unknown url type: %s' % type)
urllib.error.URLError: <urlopen error unknown url type: c>
Я использую Python 3.6 на Windows 7 и NLTK 3.2.1. Я пробовал решения, упомянутые в here и here Но никто не работал. Любое другое решение?
Переустановить с инструкциями от https://gist.github.com/alvations/0ed8641d7d2e1941b9f9 – alvas