Source code for pyknp.knp.features

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals
from __future__ import absolute_import
from pyknp.knp.rel import Rel
import unittest
import six


[docs]class Features(dict): """ feature情報を保持するオブジェクト feature情報に含まれるタグをパースし、辞書形式にする。 ex. "<正規化代表表記:遅れる/おくれる>" --> {"正規化代表表記": "遅れる/おくれる"} """
[docs] def __init__(self, spec, splitter="><", ignore_first_character=True): assert isinstance(spec, six.text_type) self.spec = spec.rstrip() self.rels = None self._tag = None if len(spec) == 0: return tag_start = 0 if ignore_first_character: tag_start = 1 tag_end = None while tag_end != -1: tag_end = self.spec.find(splitter, tag_start) kv_splitter = self.spec.find(':', tag_start, tag_end) if self.spec[tag_start:].startswith('rel '): rel = Rel(self.spec[tag_start:tag_end]) if rel.ignore is False: if self.rels is None: self.rels = [] self.rels.append(rel) elif kv_splitter == -1: key = self.spec[tag_start:tag_end] val = True # Dummy value self[key] = val else: key = self.spec[tag_start: kv_splitter] val = self.spec[kv_splitter + 1: tag_end] self[key] = val tag_start = tag_end + len(splitter)
@property def pas(self): return self._tag.pas
class FeaturesTest(unittest.TestCase): def test(self): tag_str1 = "<BGH:構文/こうぶん><文節内><係:文節内><文頭><体言>" +\ "<名詞項候補><先行詞候補><正規化代表表記:構文/こうぶん>" f1 = Features(tag_str1) self.assertEqual(f1.get("BGH"), "構文/こうぶん") self.assertEqual(f1.get("係"), "文節内") self.assertEqual(f1.get("先行詞候補"), True) self.assertEqual(f1.get("dummy"), None) self.assertEqual(f1.get("正規化代表表記"), "構文/こうぶん") def testRels(self): tag_str = """<rel type="時間" target="一九九五年" sid="950101003-002" id="1"/>""" + \ """<rel type="ヲ" target="衆院" sid="950101003-002" id="3"/>""" +\ """<rel type="ガ" target="不特定:人1"/>""" +\ """<rel type="時間" target="国会前" sid="950101003-asd" id="16"/>""" f = Features(tag_str) self.assertEqual(len(f.rels), 4) self.assertEqual(f.rels[0].tid, 1) self.assertEqual(f.rels[0].mode, "") self.assertEqual(f.rels[0].atype, "時間") self.assertEqual(f.rels[0].sid, "950101003-002") self.assertEqual(f.rels[0].target, "一九九五年") if __name__ == '__main__': unittest.main()