Для больших StrI ng, рекомендуется использовать итератор для скорости и низкой занимаемой памяти.
import re, itertools
# Original text
text = "When in the course of human Events, it becomes necessary for one People to dissolve the Political Bands which have connected them with another, and to assume among the Powers of the Earth, the separate and equal Station to which the Laws of Nature and of Nature?s God entitle them, a decent Respect to the Opinions of Mankind requires that they should declare the causes which impel them to the Separation."
n = 10
# An iterator which will extract words one by one from text when needed
words = itertools.imap(lambda m:m.group(), re.finditer(r'\w+', text))
# The final iterator that combines words into n-length groups
word_groups = itertools.izip_longest(*(words,)*n)
for g in word_groups: print g
получите следующий результат:
('When', 'in', 'the', 'course', 'of', 'human', 'Events', 'it', 'becomes', 'necessary')
('for', 'one', 'People', 'to', 'dissolve', 'the', 'Political', 'Bands', 'which', 'have')
('connected', 'them', 'with', 'another', 'and', 'to', 'assume', 'among', 'the', 'Powers')
('of', 'the', 'Earth', 'the', 'separate', 'and', 'equal', 'Station', 'to', 'which')
('the', 'Laws', 'of', 'Nature', 'and', 'of', 'Nature', 's', 'God', 'entitle')
('them', 'a', 'decent', 'Respect', 'to', 'the', 'Opinions', 'of', 'Mankind', 'requires')
('that', 'they', 'should', 'declare', 'the', 'causes', 'which', 'impel', 'them', 'to')
('the', 'Separation', None, None, None, None, None, None, None, None)
Это казалось бы довольно pythonic. – physicsmichael
Ох. Большинство приложений ngrams хотели бы «[« Когда в курсе »,« в ходе »,« пути человека ») и т. Д. –