|
3 | 3 | ================= |
4 | 4 | """ |
5 | 5 | import pickle |
| 6 | +import os |
6 | 7 |
|
7 | 8 | from .Sense import Sense |
8 | 9 | from .Sememe import Sememe |
@@ -37,8 +38,8 @@ def __init__(self, init_sim=False, init_babel=False): |
37 | 38 | init_babel (`bool`) : whether to initialize the BabelNet synest search module. |
38 | 39 | ''' |
39 | 40 | try: |
40 | | - sememe_dir, sememe_triples_dir, data_dir = [ |
41 | | - 'resources/sememe_all', 'resources/sememe_triples_taxonomy.txt', 'resources/HowNet_dict_complete'] |
| 41 | + sememe_dir, sememe_triples_dir, data_dir = [os.path.join("resources", i) for i in [ |
| 42 | + 'sememe_all', 'sememe_triples_taxonomy.txt', 'HowNet_dict_complete']] |
42 | 43 |
|
43 | 44 | # Initialize sememe list from sememe_all. |
44 | 45 | self.sememe_dic = dict() |
@@ -489,7 +490,8 @@ def initialize_similarity_calculation(self): |
489 | 490 | Implementation is contributed by Jun Yan, which is based on the paper : |
490 | 491 | "Jiangming Liu, Jinan Xu, Yujie Zhang. An Approach of Hybrid Hierarchical Structure for Word Similarity Computing by HowNet. In Proceedings of IJCNLP" |
491 | 492 | """ |
492 | | - sememe_sim_table_pickle_path, sense_tree_path, sense_syn_path = ['resources/sememe_sim_table', 'resources/sense_tree', 'resources/synonym'] |
| 493 | + sememe_sim_table_pickle_path, sense_tree_path, sense_syn_path = [os.path.join("resources", i) for i in [ |
| 494 | + 'sememe_sim_table', 'sense_tree', 'synonym']] |
493 | 495 |
|
494 | 496 | try: |
495 | 497 | self.sememe_sim_table = pickle.load( |
@@ -706,7 +708,7 @@ def get_nearest_words(self, word, language=None, score=False, pos=None, merge=Fa |
706 | 708 | def initialize_babelnet_dict(self): |
707 | 709 | """Initialize the BabelNet Synset dict. |
708 | 710 | """ |
709 | | - babel_data_path = 'resources/babel_data' |
| 711 | + babel_data_path = os.path.join('resources','babel_data') |
710 | 712 | try: |
711 | 713 | babel_synset_list = pickle.load( |
712 | 714 | get_resource(babel_data_path, "rb")) |
@@ -822,7 +824,68 @@ def get_all_synset_pos(self): |
822 | 824 | def get_all_synset_relations(self): |
823 | 825 | """Return all the relations between synsets in BabelNet. |
824 | 826 | """ |
825 | | - return ['similar', 'derivation', 'similar_to', 'derivationally_related_form', 'gloss_related_form_(monosemous)', 'gloss_related_form_(disambiguated)', 'also_see', 'pertainym', 'pertainym_(pertains_to_nouns)', 'category_domain', 'domain_of_synset_-_topic', 'antonym', 'attribute', 'region_domain', 'usage_domain', 'domain_of_synset_-_usage', 'domain_of_synset_-_region', 'verb_group', 'hypernym', 'hyponym', 'entailment', 'cause', 'this_taxon_is_source_of', 'natural_product_of_taxon', 'color', 'subclass_of', 'instance_of', 'semantically_related_form', 'part_meronym', 'uses', 'practiced_by', 'member_meronym', 'member_holonym', 'taxon_rank', 'recommended_unit_of_measurement', 'health_specialty', 'different_from', 'country_of_origin', 'iucn_conservation_status', 'instances_hyponym', 'instance_hyponym', 'political_ideology', 'located_in_the_administrative_territorial_entity', 'part_holonym', 'antiparticle', 'interaction', 'part_of', 'opposite_of', 'facet_of', 'use', 'has_part', 'programming_language', 'said_to_be_the_same_as', 'named_after', 'followed_by', 'substance_meronym', 'streak_color', 'instance_hypernym', 'twinned_administrative_body', 'continent', 'country', 'location_of_creation', 'material_used', 'month_of_the_year', 'applies_to_jurisdiction', 'work_location', 'member_of_political_party', 'country_of_citizenship', 'occupation', 'parent_taxon', 'taxonomic_type', 'member_of_category_domain', 'member_of_this_domain_-_topic', 'cause_of_death', 'field_of_work', 'place_of_death', 'place_of_birth', 'prime_factor', 'model_item', 'studied_by', 'has_parts_of_the_class', 'has_quality', 'used_by', 'has_cause', 'religion', 'killed_by', 'child', 'substance_holonym', 'basin_country', 'connects_with', 'from_narrative_universe', 'located_on_terrain_feature', 'location_of_discovery', 'diplomatic_relation', 'official_language', 'language_used', 'capital', 'member_of', 'contains_administrative_territorial_entity', 'shares_border_with', 'has_effect', 'medical_condition', 'ethnic_group', 'sport', 'indigenous_to', 'writing_system', 'noble_title', 'partially_coincident_with', 'immediate_cause_of', 'has_immediate_cause', 'lowest_point', 'follows', 'subject_has_role', 'grammatical_option_indicates', 'parent_astronomical_body', 'separated_from', 'place_of_burial', 'genre', 'participant', 'highest_point', 'located_in_or_next_to_body_of_water', 'location', 'found_in_taxon', 'conflict', 'child_astronomical_body', 'interested_in', 'writing_language', 'instrument', 'languages_spoken_written_or_signed', 'employer', 'educated_at', 'capital_of', 'possible_treatment', 'afflicts', 'symptoms', 'father', 'this_zoological_name_is_coordinate_with', 'taxon_synonym', 'measured_physical_quantity', 'has_grammatical_case', 'official_symbol', 'head_of_state', 'replaces', 'has_fruit_type', 'has_facet_polytope', 'studies', 'worshipped_by', 'based_on', 'depicts', 'residence', 'mouth_of_the_watercourse', 'author', 'main_subject', 'owner_of', 'discoverer_or_inventor', 'basic_form_of_government', 'anatomical_location', 'drug_used_for_treatment', 'hair_color', 'spouse', 'sibling', 'medical_condition_treated', 'manifestation_of', 'position_held', 'product_or_material_produced', 'territory_claimed_by', 'field_of_this_occupation', 'language_of_work_or_name', 'currency', 'office_held_by_head_of_government', 'permanent_duplicated_item', 'fabrication_method', 'risk_factor', 'headquarters_location', 'public_holiday', 'unmarried_partner', 'significant_event', 'manner_of_death', 'native_language', 'military_rank', 'award_received', 'source_of_energy', 'physically_interacts_with', 'inflows', 'lake_outflow', 'iconographic_symbol', 'military_branch', 'official_color', 'founded_by', 'calculated_from', 'has_natural_reservoir', 'replaced_by', 'movement', 'has_contributing_factor', 'spore_print_color', 'sex_or_gender', 'member_of_region_domain', 'member_of_this_domain_-_region', 'endemic_to', 'arterial_supply', 'day_in_year_for_periodic_occurrence', 'produced_by', 'owned_by', 'godparent', 'allegiance', 'main_food_source', 'develops_from', 'family', 'patron_saint', 'does_not_have_part', 'copyright_representative', 'location_of_formation', 'typically_sells', 'notable_work', 'tributary', 'characters', 'canonization_status', 'student_of', 'academic_degree', 'time_period', 'mountain_range', 'influenced_by', 'sexual_orientation', 'sexually_homologous_with', 'habitat', 'item_operated', 'medical_examinations', 'office_held_by_head_of_the_organization', 'original_combination', 'office_held_by_head_of_state', 'culture', 'feast_day', 'route_of_administration', 'type_locality_(geology)', 'located_in_time_zone', 'given_name', 'operator', 'produced_sound', 'designed_to_carry', 'side_effect', 'next_lower_rank', 'currency_symbol_description', 'central_bank/issuer', 'enclave_within', 'less_than', 'industry', 'exclave_of', 'significant_drug_interaction', 'foods_traditionally_associated', 'day_of_week', 'is_a_list_of', 'appointed_by', 'objective_of_project_or_action', 'conjugate_acid', 'conferred_by', 'cell_component', 'domain_of_saint_or_deity', 'place_of_detention', 'direction_relative_to_location', 'official_religion', 'legislative_body', 'shape', 'performer', 'target', 'organizer', 'occupant', 'has_grammatical_mood', 'has_tense', 'described_by_source', 'penalty', 'invasive_to', 'subsidiary', 'creator', 'discography', 'voice_type', 'uses_capitalization_for', 'linguistic_typology', 'official_residence', 'territory_overlaps', 'contributing_factor_of', 'encodes', 'professorship', 'encoded_by', 'biological_process', 'narrative_location', 'archives_at', 'measures', 'doctoral_student', 'dual_to', 'flower_color', 'convicted_of', 'first_aid_measures', 'place_of_origin_(switzerland)', 'destination_point', 'constellation', 'history_of_topic', 'parent_organization', 'lifestyle', 'legal_form', 'ancestral_home', 'terminus', 'original_language_of_film_or_tv_show', 'historic_county', 'member_of_usage_domain', 'member_of_this_domain_-_usage', 'honorific_prefix', 'next_higher_rank', 'lakes_on_river', 'origin_of_the_watercourse', 'participant_in', 'located_in_present-day_administrative_territorial_entity', 'pathogen_transmission_process', 'geography_of_topic', 'depicted_by', 'student', 'conjugate_base', 'host', 'solid_solution_series_with', 'contains', 'organization_directed_by_the_office_or_person', 'distribution_format', 'relative', 'mother', 'represents', 'sports_discipline_competed_in', 'victory', 'part_of_the_series', 'is_pollinated_by', 'authority', 'molecular_function', 'commemorates', 'basionym', 'temporal_range_start', 'powered_by', 'anthem', 'prerequisite', 'central_bank', 'of', 'developer', 'sponsor', 'has_works_in_the_collection', 'social_classification', 'contributor_to_the_creative_work_or_subject', 'voice_actor', 'chairperson', 'commissioned_by', 'manufacturer', 'anatomical_branch_of', 'foundational_text', 'appears_in_the_form_of', 'contains_settlement', 'dialect_of', 'base', 'has_vertex_figure', 'season_starts', 'members_have_occupation', 'has_anatomical_branch', 'does_not_have_quality', 'commander_of_(deprecated)', "topic's_main_category", 'measurement_scale', 'present_in_work', 'significant_person', 'gui_toolkit_or_framework', 'operating_area', 'wears', 'designated_as_terrorist_by', 'platform', 'has_boundary', 'derivative_work', 'greater_than', 'place_of_publication', 'offers_view_on', 'architectural_style', 'parent_of_this_hybrid_breed_or_cultivar', 'doctoral_advisor', 'by-product', 'signatory', 'production_statistics', 'codomain', 'domain', 'honorific_suffix', 'had_as_last_meal', 'birthday', 'historical_region', 'cause_of_destruction', 'people_or_cargo_transported', 'place_served_by_transport_hub', 'family_name', 'programming_paradigm', 'enemy_of', 'has_list', 'language_regulatory_body', 'incarnation_of', 'muscle_insertion', 'open_days', 'payment_types_accepted', 'member_of_military_unit', 'crosses', 'determination_method', 'publisher', 'by-product_of', 'statement_describes'] |
| 827 | + return ['similar', 'derivation', 'similar_to', 'derivationally_related_form', 'gloss_related_form_(monosemous)', |
| 828 | + 'gloss_related_form_(disambiguated)', 'also_see', 'pertainym', 'pertainym_(pertains_to_nouns)', 'category_domain', |
| 829 | + 'domain_of_synset_-_topic', 'antonym', 'attribute', 'region_domain', 'usage_domain', 'domain_of_synset_-_usage', |
| 830 | + 'domain_of_synset_-_region', 'verb_group', 'hypernym', 'hyponym', 'entailment', 'cause', 'this_taxon_is_source_of', |
| 831 | + 'natural_product_of_taxon', 'color', 'subclass_of', 'instance_of', 'semantically_related_form', 'part_meronym', |
| 832 | + 'uses', 'practiced_by', 'member_meronym', 'member_holonym', 'taxon_rank', 'recommended_unit_of_measurement', |
| 833 | + 'health_specialty', 'different_from', 'country_of_origin', 'iucn_conservation_status', 'instances_hyponym', |
| 834 | + 'instance_hyponym', 'political_ideology', 'located_in_the_administrative_territorial_entity', 'part_holonym', |
| 835 | + 'antiparticle', 'interaction', 'part_of', 'opposite_of', 'facet_of', 'use', 'has_part', 'programming_language', |
| 836 | + 'said_to_be_the_same_as', 'named_after', 'followed_by', 'substance_meronym', 'streak_color', 'instance_hypernym', |
| 837 | + 'twinned_administrative_body', 'continent', 'country', 'location_of_creation', 'material_used', 'month_of_the_year', |
| 838 | + 'applies_to_jurisdiction', 'work_location', 'member_of_political_party', 'country_of_citizenship', 'occupation', |
| 839 | + 'parent_taxon', 'taxonomic_type', 'member_of_category_domain', 'member_of_this_domain_-_topic', 'cause_of_death', |
| 840 | + 'field_of_work', 'place_of_death', 'place_of_birth', 'prime_factor', 'model_item', 'studied_by', 'has_parts_of_the_class', |
| 841 | + 'has_quality', 'used_by', 'has_cause', 'religion', 'killed_by', 'child', 'substance_holonym', 'basin_country', |
| 842 | + 'connects_with', 'from_narrative_universe', 'located_on_terrain_feature', 'location_of_discovery', |
| 843 | + 'diplomatic_relation', 'official_language', 'language_used', 'capital', 'member_of', |
| 844 | + 'contains_administrative_territorial_entity', 'shares_border_with', 'has_effect', 'medical_condition', 'ethnic_group', |
| 845 | + 'sport', 'indigenous_to', 'writing_system', 'noble_title', 'partially_coincident_with', 'immediate_cause_of', |
| 846 | + 'has_immediate_cause', 'lowest_point', 'follows', 'subject_has_role', 'grammatical_option_indicates', 'parent_astronomical_body', |
| 847 | + 'separated_from', 'place_of_burial', 'genre', 'participant', 'highest_point', 'located_in_or_next_to_body_of_water', 'location', |
| 848 | + 'found_in_taxon', 'conflict', 'child_astronomical_body', 'interested_in', 'writing_language', 'instrument', 'languages_spoken_written_or_signed', |
| 849 | + 'employer', 'educated_at', 'capital_of', 'possible_treatment', 'afflicts', 'symptoms', 'father', 'this_zoological_name_is_coordinate_with', 'taxon_synonym', |
| 850 | + 'measured_physical_quantity', 'has_grammatical_case', 'official_symbol', 'head_of_state', 'replaces', 'has_fruit_type', 'has_facet_polytope', |
| 851 | + 'studies', 'worshipped_by', 'based_on', 'depicts', 'residence', 'mouth_of_the_watercourse', 'author', 'main_subject', |
| 852 | + 'owner_of', 'discoverer_or_inventor', 'basic_form_of_government', 'anatomical_location', 'drug_used_for_treatment', 'hair_color', |
| 853 | + 'spouse', 'sibling', 'medical_condition_treated', 'manifestation_of', 'position_held', 'product_or_material_produced', |
| 854 | + 'territory_claimed_by', 'field_of_this_occupation', 'language_of_work_or_name', 'currency', 'office_held_by_head_of_government', |
| 855 | + 'permanent_duplicated_item', 'fabrication_method', 'risk_factor', 'headquarters_location', 'public_holiday', 'unmarried_partner', |
| 856 | + 'significant_event', 'manner_of_death', 'native_language', 'military_rank', 'award_received', 'source_of_energy', 'physically_interacts_with', |
| 857 | + 'inflows', 'lake_outflow', 'iconographic_symbol', 'military_branch', 'official_color', 'founded_by', 'calculated_from', 'has_natural_reservoir', |
| 858 | + 'replaced_by', 'movement', 'has_contributing_factor', 'spore_print_color', 'sex_or_gender', 'member_of_region_domain', 'member_of_this_domain_-_region', |
| 859 | + 'endemic_to', 'arterial_supply', 'day_in_year_for_periodic_occurrence', 'produced_by', 'owned_by', 'godparent', 'allegiance', 'main_food_source', |
| 860 | + 'develops_from', 'family', 'patron_saint', 'does_not_have_part', 'copyright_representative', 'location_of_formation', 'typically_sells', |
| 861 | + 'notable_work', 'tributary', 'characters', 'canonization_status', 'student_of', 'academic_degree', 'time_period', 'mountain_range', |
| 862 | + 'influenced_by', 'sexual_orientation', 'sexually_homologous_with', 'habitat', 'item_operated', 'medical_examinations', |
| 863 | + 'office_held_by_head_of_the_organization', 'original_combination', 'office_held_by_head_of_state', 'culture', 'feast_day', 'route_of_administration', 'type_locality_(geology)', |
| 864 | + 'located_in_time_zone', 'given_name', 'operator', 'produced_sound', 'designed_to_carry', 'side_effect', 'next_lower_rank', 'currency_symbol_description', |
| 865 | + 'central_bank/issuer', 'enclave_within', 'less_than', 'industry', 'exclave_of', 'significant_drug_interaction', 'foods_traditionally_associated', |
| 866 | + 'day_of_week', 'is_a_list_of', 'appointed_by', 'objective_of_project_or_action', 'conjugate_acid', 'conferred_by', 'cell_component', 'domain_of_saint_or_deity', |
| 867 | + 'place_of_detention', 'direction_relative_to_location', 'official_religion', 'legislative_body', 'shape', 'performer', 'target', 'organizer', |
| 868 | + 'occupant', 'has_grammatical_mood', 'has_tense', 'described_by_source', 'penalty', 'invasive_to', 'subsidiary', 'creator', 'discography', 'voice_type', |
| 869 | + 'uses_capitalization_for', 'linguistic_typology', 'official_residence', 'territory_overlaps', 'contributing_factor_of', 'encodes', 'professorship', |
| 870 | + 'encoded_by', 'biological_process', 'narrative_location', 'archives_at', 'measures', 'doctoral_student', 'dual_to', 'flower_color', 'convicted_of', |
| 871 | + 'first_aid_measures', 'place_of_origin_(switzerland)', 'destination_point', 'constellation', 'history_of_topic', 'parent_organization', 'lifestyle', |
| 872 | + 'legal_form', 'ancestral_home', 'terminus', 'original_language_of_film_or_tv_show', 'historic_county', 'member_of_usage_domain', 'member_of_this_domain_-_usage', |
| 873 | + 'honorific_prefix', 'next_higher_rank', 'lakes_on_river', 'origin_of_the_watercourse', 'participant_in', 'located_in_present-day_administrative_territorial_entity', |
| 874 | + 'pathogen_transmission_process', 'geography_of_topic', 'depicted_by', 'student', 'conjugate_base', 'host', 'solid_solution_series_with', 'contains', |
| 875 | + 'organization_directed_by_the_office_or_person', 'distribution_format', 'relative', 'mother', 'represents', 'sports_discipline_competed_in', 'victory', 'part_of_the_series', |
| 876 | + 'is_pollinated_by', 'authority', 'molecular_function', 'commemorates', 'basionym', 'temporal_range_start', 'powered_by', 'anthem', 'prerequisite', 'central_bank', |
| 877 | + 'of', 'developer', 'sponsor', 'has_works_in_the_collection', 'social_classification', 'contributor_to_the_creative_work_or_subject', 'voice_actor', 'chairperson', |
| 878 | + 'commissioned_by', 'manufacturer', 'anatomical_branch_of', 'foundational_text', 'appears_in_the_form_of', |
| 879 | + 'contains_settlement', 'dialect_of', 'base', 'has_vertex_figure', 'season_starts', 'members_have_occupation', |
| 880 | + 'has_anatomical_branch', 'does_not_have_quality', 'commander_of_(deprecated)', "topic's_main_category", |
| 881 | + 'measurement_scale', 'present_in_work', 'significant_person', 'gui_toolkit_or_framework', 'operating_area', |
| 882 | + 'wears', 'designated_as_terrorist_by', 'platform', 'has_boundary', 'derivative_work', 'greater_than', |
| 883 | + 'place_of_publication', 'offers_view_on', 'architectural_style', 'parent_of_this_hybrid_breed_or_cultivar', |
| 884 | + 'doctoral_advisor', 'by-product', 'signatory', 'production_statistics', 'codomain', 'domain', 'honorific_suffix', |
| 885 | + 'had_as_last_meal', 'birthday', 'historical_region', 'cause_of_destruction', 'people_or_cargo_transported', |
| 886 | + 'place_served_by_transport_hub', 'family_name', 'programming_paradigm', 'enemy_of', 'has_list', 'language_regulatory_body', |
| 887 | + 'incarnation_of', 'muscle_insertion', 'open_days', 'payment_types_accepted', 'member_of_military_unit', 'crosses', |
| 888 | + 'determination_method', 'publisher', 'by-product_of', 'statement_describes'] |
826 | 889 |
|
827 | 890 | def get_synset_relation(self, x, y, return_triples=False, strict=True): |
828 | 891 | """Get the relation between two synsets. |
|
0 commit comments