Package pypln :: Package workers :: Module pos
[hide private]

Source Code for Module pypln.workers.pos

 1  # coding: utf-8 
 2   
 3  from nltk import pos_tag 
 4   
 5   
 6  __meta__ = {'from': 'document', 
 7              'requires': ['text', 'tokens'], 
 8              'to': 'document', 
 9              'provides': ['pos'],} 
10  #TODO: add 'lang' to 'requires' 
11   
12 -def _put_offset(text, tagged_text):
13 result = [] 14 position = 0 15 for token, classification in tagged_text: 16 token_position = text.find(token, position) 17 result.append((token, classification, token_position)) 18 position = token_position + len(token) - 1 19 return result
20
21 -def main(document):
22 text = document['text'] 23 tokens = document['tokens'] 24 tagged_text = pos_tag(tokens) 25 return {'pos': _put_offset(text, tagged_text)}
26