Migrating to Python-3.6, initial commit, changes were made by 2to3. (#…

…160). Added pytest dependencies (related to #156)
nicolay-r · Aug 16, 2021 · b2ac8cf · b2ac8cf
1 parent 256736f
commit b2ac8cf
Show file tree

Hide file tree

Showing 169 changed files with 833 additions and 832 deletions.
diff --git a/arekit/common/context/terms_mapper.py b/arekit/common/context/terms_mapper.py
@@ -16,7 +16,7 @@ def iter_mapped(self, terms):
 
         for i, term in enumerate(terms):
 
-            if isinstance(term, unicode):
+            if isinstance(term, str):
                 m_term = self.map_word(i, term)
             elif isinstance(term, Token):
                 m_term = self.map_token(i, term)

diff --git a/arekit/common/embeddings/base.py b/arekit/common/embeddings/base.py
@@ -90,7 +90,7 @@ def __try_find_word_index_pair(self, word):
         returns: pair
             (processed_term, index)
         """
-        assert(isinstance(word, unicode))
+        assert(isinstance(word, str))
 
         has_index = self.__index_by_word[word] if word in self.__index_by_word else None
         word = word if has_index else None
@@ -119,12 +119,12 @@ def get_word_by_index(self, index):
         return self.__words[index]
 
     def try_find_index_by_word(self, word):
-        assert(isinstance(word, unicode))
+        assert(isinstance(word, str))
         _, index = self.__hadler_core(word)
         return index
 
     def try_find_index_by_plain_word(self, word):
-        assert(isinstance(word, unicode))
+        assert(isinstance(word, str))
         _, index = self.__hadler_core(word)
         return index
 
@@ -138,12 +138,12 @@ def _handler(self, word):
     # region overriden methods
 
     def __contains__(self, word):
-        assert(isinstance(word, unicode))
+        assert(isinstance(word, str))
         _, index = self.__hadler_core(word)
         return index is not None
 
     def __getitem__(self, word):
-        assert(isinstance(word, unicode))
+        assert(isinstance(word, str))
         _, index = self.__hadler_core(word)
         return self._matrix[index]
 

diff --git a/arekit/common/entities/base.py b/arekit/common/entities/base.py
@@ -1,8 +1,8 @@
 class Entity(object):
 
     def __init__(self, value, e_type, id_in_doc, group_index=None):
-        assert(isinstance(value, unicode) and len(value) > 0)
-        assert(isinstance(e_type, unicode) or e_type is None)
+        assert(isinstance(value, str) and len(value) > 0)
+        assert(isinstance(e_type, str) or e_type is None)
         assert(isinstance(group_index, int) or group_index is None)
         self.__value = value.lower()
         self.__id = id_in_doc

diff --git a/arekit/common/entities/collection.py b/arekit/common/entities/collection.py
@@ -51,7 +51,7 @@ def get_entity_by_index(self, index):
         return self.__entities[index]
 
     def try_get_entities(self, value, group_key):
-        assert(isinstance(value, unicode))
+        assert(isinstance(value, str))
 
         if group_key == self.KeyType.BY_SYNONYMS:
             key = self.__synonyms.get_synonym_group_index(value)

diff --git a/arekit/common/entities/formatters/str_rus_cased_fmt.py b/arekit/common/entities/formatters/str_rus_cased_fmt.py
@@ -11,24 +11,24 @@ class RussianEntitiesCasedFormatter(StringEntitiesFormatter):
 
     # Объект/Субъект
     obj_subj_cases_map = {
-        RussianCases.UNKN: [u'', u''],      # UNKN
-        RussianCases.NOM: [u'', u"ы"],      # именительный
-        RussianCases.GEN: [u'а', u'ов'],    # родительный
-        RussianCases.DAT: [u'y', u'ам'],    # дательный
-        RussianCases.ACC: [u'', u'ы'],      # винительный
-        RussianCases.INS: [u'ом', u'aми'],  # творительный
-        RussianCases.ABL: [u'e', u'ах']     # предложный
+        RussianCases.UNKN: ['', ''],      # UNKN
+        RussianCases.NOM: ['', "ы"],      # именительный
+        RussianCases.GEN: ['а', 'ов'],    # родительный
+        RussianCases.DAT: ['y', 'ам'],    # дательный
+        RussianCases.ACC: ['', 'ы'],      # винительный
+        RussianCases.INS: ['ом', 'aми'],  # творительный
+        RussianCases.ABL: ['e', 'ах']     # предложный
     }
 
     # Сущност
     entity_cases_map = {
-        RussianCases.UNKN: [u'ь', u'и'],     # UNKN
-        RussianCases.NOM: [u'ь', u"и"],      # именительный
-        RussianCases.GEN: [u'и', u'ей'],     # родительный
-        RussianCases.DAT: [u'и', u'ям'],     # дательный
-        RussianCases.ACC: [u'ь', u'и'],      # винительный
-        RussianCases.INS: [u'ью', u'ьями'],  # творительный
-        RussianCases.ABL: [u'и', u'ях']      # предложный
+        RussianCases.UNKN: ['ь', 'и'],     # UNKN
+        RussianCases.NOM: ['ь', "и"],      # именительный
+        RussianCases.GEN: ['и', 'ей'],     # родительный
+        RussianCases.DAT: ['и', 'ям'],     # дательный
+        RussianCases.ACC: ['ь', 'и'],      # винительный
+        RussianCases.INS: ['ью', 'ьями'],  # творительный
+        RussianCases.ABL: ['и', 'ях']      # предложный
     }
 
     def __init__(self, pos_tagger):
@@ -43,22 +43,22 @@ def to_string(self, original_value, entity_type):
         cases_map = None
 
         if (entity_type == EntityType.Object) or (entity_type == EntityType.SynonymObject):
-            template = u"объект"
+            template = "объект"
             cases_map = self.obj_subj_cases_map
         elif (entity_type == EntityType.Subject) or (entity_type == EntityType.SynonymSubject):
-            template = u"субъект"
+            template = "субъект"
             cases_map = self.obj_subj_cases_map
         elif entity_type == EntityType.Other:
-            template = u"сущност"
+            template = "сущност"
             cases_map = self.entity_cases_map
 
         return self.__get_correct_declention(value=original_value.Value,
                                              template=template,
                                              cases_map=cases_map)
 
     def __get_correct_declention(self, value, template, cases_map):
-        assert(isinstance(value, unicode))
-        assert(isinstance(template, unicode))
+        assert(isinstance(value, str))
+        assert(isinstance(template, str))
         assert(isinstance(cases_map, dict))
 
         num = self.__pos_tagger.get_term_number(value)

diff --git a/arekit/common/entities/formatters/str_rus_nocased_fmt.py b/arekit/common/entities/formatters/str_rus_nocased_fmt.py
@@ -9,8 +9,8 @@ def to_string(self, original_value, entity_type):
         assert(isinstance(entity_type, EntityType))
 
         if (entity_type == EntityType.Object) or (entity_type == EntityType.SynonymObject):
-            return u"объект"
+            return "объект"
         elif (entity_type == EntityType.Subject) or (entity_type == EntityType.SynonymSubject):
-            return u"субъект"
+            return "субъект"
         if entity_type == EntityType.Other:
-            return u"сущность"
+            return "сущность"
diff --git a/arekit/common/entities/formatters/str_simple_fmt.py b/arekit/common/entities/formatters/str_simple_fmt.py
@@ -15,10 +15,10 @@ def to_string(self, original_value, entity_type):
         assert(isinstance(entity_type, EntityType))
 
         if entity_type == EntityType.Other:
-            return u"e"
+            return "e"
         elif entity_type == EntityType.Object or entity_type == EntityType.SynonymObject:
-            return u"object"
+            return "object"
         elif entity_type == EntityType.Subject or entity_type == EntityType.SynonymSubject:
-            return u"subject"
+            return "subject"
 
         return None
diff --git a/arekit/common/entities/formatters/str_simple_sharp_prefixed_fmt.py b/arekit/common/entities/formatters/str_simple_sharp_prefixed_fmt.py
@@ -8,8 +8,8 @@ def to_string(self, original_value, entity_type):
         assert(isinstance(entity_type, EntityType))
 
         if (entity_type == EntityType.Object) or (entity_type == EntityType.SynonymObject):
-            return u"#O"
+            return "#O"
         elif (entity_type == EntityType.Subject) or (entity_type == EntityType.SynonymSubject):
-            return u"#S"
+            return "#S"
         elif entity_type == EntityType.Other:
-            return u"#E"
+            return "#E"
diff --git a/arekit/common/entities/formatters/str_simple_uppercase_fmt.py b/arekit/common/entities/formatters/str_simple_uppercase_fmt.py
@@ -10,11 +10,11 @@ def to_string(self, original_value, entity_type):
         assert(isinstance(entity_type, EntityType))
 
         if entity_type == EntityType.Other:
-            mask = u"ENTITY"
+            mask = "ENTITY"
         elif entity_type == EntityType.Subject or entity_type == EntityType.SynonymSubject:
-            mask = u"E_SUBJ"
+            mask = "E_SUBJ"
         elif entity_type == EntityType.Object or entity_type == EntityType.SynonymObject:
-            mask = u"E_OBJ"
+            mask = "E_OBJ"
         else:
             raise NotImplementedError()
 

diff --git a/arekit/common/entities/formatters/types.py b/arekit/common/entities/formatters/types.py
@@ -13,16 +13,16 @@ class EntityFormatterTypes(Enum):
 class EntityFormattersService:
 
     __names = {
-        u"rus-cased-fmt": EntityFormatterTypes.RussianCased,
-        u'rus-simple': EntityFormatterTypes.RussianSimple,
-        u'simple-uppercase': EntityFormatterTypes.SimpleUppercase,
-        u'simple': EntityFormatterTypes.Simple,
-        u'sharp-simple': EntityFormatterTypes.SimpleSharpPrefixed,
+        "rus-cased-fmt": EntityFormatterTypes.RussianCased,
+        'rus-simple': EntityFormatterTypes.RussianSimple,
+        'simple-uppercase': EntityFormatterTypes.SimpleUppercase,
+        'simple': EntityFormatterTypes.Simple,
+        'sharp-simple': EntityFormatterTypes.SimpleSharpPrefixed,
     }
 
     @staticmethod
     def __iter_supported_names():
-        return iter(EntityFormattersService.__names.keys())
+        return iter(list(EntityFormattersService.__names.keys()))
 
     @staticmethod
     def get_type_by_name(name):

diff --git a/arekit/common/evaluation/evaluators/base.py b/arekit/common/evaluation/evaluators/base.py
@@ -61,8 +61,8 @@ def __iter_diff_core(self, etalon_opins, test_opins):
             if self.__eval_mode == EvaluationModes.Classification:
                 # That could not be possible, since we perform
                 # classification of already provided opinions.
-                raise Exception(u"Opinion of test collection (`{s}`->`{t}`) was not "
-                                u"found in etalon collection!".format(s=o_test.SourceValue,
+                raise Exception("Opinion of test collection (`{s}`->`{t}`) was not "
+                                "found in etalon collection!".format(s=o_test.SourceValue,
                                                                       t=o_test.TargetValue))
             elif self.__eval_mode == EvaluationModes.Extraction:
                 yield [o_test, None, o_test.Sentiment]
@@ -86,7 +86,7 @@ def _check_is_supported(self, label, is_label_supported):
             return True
 
         if not is_label_supported(label):
-            raise Exception(u"Label \"{label}\" is not supported by {e}".format(
+            raise Exception("Label \"{label}\" is not supported by {e}".format(
                 label=label_to_str(label),
                 e=type(self).__name__))
 

diff --git a/arekit/common/evaluation/evaluators/cmp_table.py b/arekit/common/evaluation/evaluators/cmp_table.py
@@ -39,18 +39,18 @@ def create_template_df(rows_count):
                                    DocumentCompareTable.C_CMP])
 
         # filling with blank rows.
-        df[DocumentCompareTable.C_ID] = range(rows_count)
+        df[DocumentCompareTable.C_ID] = list(range(rows_count))
         df.set_index(DocumentCompareTable.C_ID, inplace=True)
 
         return df
 
     @classmethod
     def load(cls, filepath):
-        assert(isinstance(filepath, unicode))
+        assert(isinstance(filepath, str))
         return cls(cmp_table=pd.DataFrame.from_csv(filepath))
 
     def save(self, filepath):
-        assert(isinstance(filepath, unicode))
+        assert(isinstance(filepath, str))
         self.__cmp_table.to_csv(filepath)
 
     def filter_result_column_by_label(self, label):

diff --git a/arekit/common/evaluation/results/base.py b/arekit/common/evaluation/results/base.py
@@ -31,15 +31,15 @@ def is_label_supported(self, label):
         return label in self.__supported_labels
 
     def get_result_by_metric(self, metric_name):
-        assert(isinstance(metric_name, unicode))
+        assert(isinstance(metric_name, str))
         return self._total_result[metric_name]
 
     def iter_total_by_param_results(self):
         assert(self._total_result is not None)
-        return self._total_result.iteritems()
+        return iter(self._total_result.items())
 
     def iter_dataframe_cmp_tables(self):
-        return self._cmp_tables.iteritems()
+        return iter(self._cmp_tables.items())
 
     def reg_doc(self, cmp_pair, cmp_table):
         """ Registering cmp_table.

diff --git a/arekit/common/experiment/annot/base.py b/arekit/common/experiment/annot/base.py
@@ -28,8 +28,8 @@ def __iter_annotated_collections(self, data_type, filter_func, doc_ops, opin_ops
         assert(isinstance(doc_ops, DocumentOperations))
         assert(isinstance(opin_ops, OpinionOperations))
 
-        docs_to_annot_list = filter(filter_func,
-                                    doc_ops.iter_doc_ids_to_annotate())
+        docs_to_annot_list = list(filter(filter_func,
+                                    doc_ops.iter_doc_ids_to_annotate()))
 
         if len(docs_to_annot_list) == 0:
             logger.info("[{}]: Nothing to annotate".format(data_type))

diff --git a/arekit/common/experiment/annot/single_label.py b/arekit/common/experiment/annot/single_label.py
@@ -35,10 +35,10 @@ def __init__(self, dist_in_terms_bound, label_instance, dist_in_sents=0, ignored
     def __create_key_by_entity_pair(e1, e2):
         assert(isinstance(e1, Entity))
         assert(isinstance(e2, Entity))
-        return u"{}_{}".format(e1.IdInDocument, e2.IdInDocument)
+        return "{}_{}".format(e1.IdInDocument, e2.IdInDocument)
 
     def __is_ignored_entity_value(self, entity_value):
-        assert(isinstance(entity_value, unicode))
+        assert(isinstance(entity_value, str))
         return entity_value in self.__ignored_entity_values
 
     def __iter_opinions_between_entities(self, relevant_pairs, entities_collection):

diff --git a/arekit/common/experiment/cv/base.py b/arekit/common/experiment/cv/base.py
@@ -11,7 +11,7 @@ def __init__(self, supported_data_types, doc_ids_to_fold, cv_count, splitter):
         assert(isinstance(splitter, CrossValidationSplitter))
 
         if len(supported_data_types) > 2:
-            raise NotImplementedError(u"Experiments with such amount of data-types are not supported!")
+            raise NotImplementedError("Experiments with such amount of data-types are not supported!")
 
         super(TwoClassCVFolding, self).__init__(doc_ids_to_fold=doc_ids_to_fold,
                                                 supported_data_types=supported_data_types)
@@ -27,7 +27,7 @@ def CVCount(self):
 
     @property
     def Name(self):
-        return u"cv{0}".format(self.__cv_count)
+        return "cv{0}".format(self.__cv_count)
 
     # endregion
 
@@ -47,7 +47,7 @@ def fold_doc_ids_set(self):
             }
 
         if self.__splitter is None:
-            raise NotImplementedError(u"Splitter has not been intialized!")
+            raise NotImplementedError("Splitter has not been intialized!")
 
         it = self.__splitter.items_to_cv_pairs(doc_ids=set(doc_ids),
                                                cv_count=self.__cv_count)
@@ -70,6 +70,6 @@ def iter_states(self):
     def get_current_state(self):
         """ Providing current iteration index.
         """
-        return unicode(self._iteration_index)
+        return str(self._iteration_index)
 
     # endregion
diff --git a/arekit/common/experiment/cv/splitters/default.py b/arekit/common/experiment/cv/splitters/default.py
@@ -42,7 +42,7 @@ def items_to_cv_pairs(self, doc_ids, cv_count):
         chunks = self.__chunk_it(doc_ids, cv_count)
 
         for test_index, chunk in enumerate(chunks):
-            train_indices = range(len(chunks))
+            train_indices = list(range(len(chunks)))
             train_indices.remove(test_index)
 
             large = [v for train_index in train_indices for v in chunks[train_index]]

diff --git a/arekit/common/experiment/data/serializing.py b/arekit/common/experiment/data/serializing.py
@@ -15,7 +15,7 @@ def __init__(self, label_scaler, annot, stemmer):
         self.__label_scaler = label_scaler
 
         if self.LabelsCount != annot.LabelsCount:
-            raise Exception(u"Label scaler and annotator are incompatible due to differs in labels count!")
+            raise Exception("Label scaler and annotator are incompatible due to differs in labels count!")
 
         self.__annot = annot
 

diff --git a/arekit/common/experiment/extract/opinions.py b/arekit/common/experiment/extract/opinions.py
@@ -22,7 +22,7 @@ def __iter_linked_text_opinion_lists(news, opin_ops, data_type, filter_text_opin
         linked_text_opinions = news.extract_linked_text_opinions(opinion)
         assert(linked_text_opinions, LinkedTextOpinionsWrapper)
 
-        filtered_text_opinions = filter(filter_text_opinion_func, linked_text_opinions)
+        filtered_text_opinions = list(filter(filter_text_opinion_func, linked_text_opinions))
 
         if len(filtered_text_opinions) == 0:
             continue

diff --git a/arekit/common/experiment/folding/fixed.py b/arekit/common/experiment/folding/fixed.py
@@ -12,7 +12,7 @@ def __init__(self, doc_to_dtype_func, doc_ids_to_fold, supported_data_types):
 
     @property
     def Name(self):
-        return u"fixed"
+        return "fixed"
 
     def fold_doc_ids_set(self):
 
@@ -29,4 +29,4 @@ def fold_doc_ids_set(self):
     def get_current_state(self):
         """ Returns in order to be compatible with cv-based experiment format.
         """
-        return u"0"
+        return "0"
diff --git a/arekit/common/experiment/folding/nofold.py b/arekit/common/experiment/folding/nofold.py
@@ -7,14 +7,14 @@ class NoFolding(BaseExperimentDataFolding):
 
     def __init__(self, doc_ids_to_fold, supported_data_types):
         if len(supported_data_types) > 1:
-            raise NotImplementedError(u"Experiments with such amount of data-types are not supported!")
+            raise NotImplementedError("Experiments with such amount of data-types are not supported!")
 
         super(NoFolding, self).__init__(doc_ids_to_fold=doc_ids_to_fold,
                                         supported_data_types=supported_data_types)
 
     @property
     def Name(self):
-        return u"na"
+        return "na"
 
     def fold_doc_ids_set(self):
         return {
@@ -24,4 +24,4 @@ def fold_doc_ids_set(self):
     def get_current_state(self):
         """ Returns in order to be compatible with cv-based experiment format.
         """
-        return u"0"
+        return "0"