Skip to content

Commit

Permalink
Migrating to Python-3.6, initial commit, changes were made by 2to3. (#…
Browse files Browse the repository at this point in the history
…160). Added pytest dependencies (related to #156)
  • Loading branch information
nicolay-r committed Aug 16, 2021
1 parent 256736f commit b2ac8cf
Show file tree
Hide file tree
Showing 169 changed files with 833 additions and 832 deletions.
2 changes: 1 addition & 1 deletion arekit/common/context/terms_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def iter_mapped(self, terms):

for i, term in enumerate(terms):

if isinstance(term, unicode):
if isinstance(term, str):
m_term = self.map_word(i, term)
elif isinstance(term, Token):
m_term = self.map_token(i, term)
Expand Down
10 changes: 5 additions & 5 deletions arekit/common/embeddings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def __try_find_word_index_pair(self, word):
returns: pair
(processed_term, index)
"""
assert(isinstance(word, unicode))
assert(isinstance(word, str))

has_index = self.__index_by_word[word] if word in self.__index_by_word else None
word = word if has_index else None
Expand Down Expand Up @@ -119,12 +119,12 @@ def get_word_by_index(self, index):
return self.__words[index]

def try_find_index_by_word(self, word):
assert(isinstance(word, unicode))
assert(isinstance(word, str))
_, index = self.__hadler_core(word)
return index

def try_find_index_by_plain_word(self, word):
assert(isinstance(word, unicode))
assert(isinstance(word, str))
_, index = self.__hadler_core(word)
return index

Expand All @@ -138,12 +138,12 @@ def _handler(self, word):
# region overriden methods

def __contains__(self, word):
assert(isinstance(word, unicode))
assert(isinstance(word, str))
_, index = self.__hadler_core(word)
return index is not None

def __getitem__(self, word):
assert(isinstance(word, unicode))
assert(isinstance(word, str))
_, index = self.__hadler_core(word)
return self._matrix[index]

Expand Down
4 changes: 2 additions & 2 deletions arekit/common/entities/base.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
class Entity(object):

def __init__(self, value, e_type, id_in_doc, group_index=None):
assert(isinstance(value, unicode) and len(value) > 0)
assert(isinstance(e_type, unicode) or e_type is None)
assert(isinstance(value, str) and len(value) > 0)
assert(isinstance(e_type, str) or e_type is None)
assert(isinstance(group_index, int) or group_index is None)
self.__value = value.lower()
self.__id = id_in_doc
Expand Down
2 changes: 1 addition & 1 deletion arekit/common/entities/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def get_entity_by_index(self, index):
return self.__entities[index]

def try_get_entities(self, value, group_key):
assert(isinstance(value, unicode))
assert(isinstance(value, str))

if group_key == self.KeyType.BY_SYNONYMS:
key = self.__synonyms.get_synonym_group_index(value)
Expand Down
38 changes: 19 additions & 19 deletions arekit/common/entities/formatters/str_rus_cased_fmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,24 @@ class RussianEntitiesCasedFormatter(StringEntitiesFormatter):

# Объект/Субъект
obj_subj_cases_map = {
RussianCases.UNKN: [u'', u''], # UNKN
RussianCases.NOM: [u'', u"ы"], # именительный
RussianCases.GEN: [u'а', u'ов'], # родительный
RussianCases.DAT: [u'y', u'ам'], # дательный
RussianCases.ACC: [u'', u'ы'], # винительный
RussianCases.INS: [u'ом', u'aми'], # творительный
RussianCases.ABL: [u'e', u'ах'] # предложный
RussianCases.UNKN: ['', ''], # UNKN
RussianCases.NOM: ['', "ы"], # именительный
RussianCases.GEN: ['а', 'ов'], # родительный
RussianCases.DAT: ['y', 'ам'], # дательный
RussianCases.ACC: ['', 'ы'], # винительный
RussianCases.INS: ['ом', 'aми'], # творительный
RussianCases.ABL: ['e', 'ах'] # предложный
}

# Сущност
entity_cases_map = {
RussianCases.UNKN: [u'ь', u'и'], # UNKN
RussianCases.NOM: [u'ь', u"и"], # именительный
RussianCases.GEN: [u'и', u'ей'], # родительный
RussianCases.DAT: [u'и', u'ям'], # дательный
RussianCases.ACC: [u'ь', u'и'], # винительный
RussianCases.INS: [u'ью', u'ьями'], # творительный
RussianCases.ABL: [u'и', u'ях'] # предложный
RussianCases.UNKN: ['ь', 'и'], # UNKN
RussianCases.NOM: ['ь', "и"], # именительный
RussianCases.GEN: ['и', 'ей'], # родительный
RussianCases.DAT: ['и', 'ям'], # дательный
RussianCases.ACC: ['ь', 'и'], # винительный
RussianCases.INS: ['ью', 'ьями'], # творительный
RussianCases.ABL: ['и', 'ях'] # предложный
}

def __init__(self, pos_tagger):
Expand All @@ -43,22 +43,22 @@ def to_string(self, original_value, entity_type):
cases_map = None

if (entity_type == EntityType.Object) or (entity_type == EntityType.SynonymObject):
template = u"объект"
template = "объект"
cases_map = self.obj_subj_cases_map
elif (entity_type == EntityType.Subject) or (entity_type == EntityType.SynonymSubject):
template = u"субъект"
template = "субъект"
cases_map = self.obj_subj_cases_map
elif entity_type == EntityType.Other:
template = u"сущност"
template = "сущност"
cases_map = self.entity_cases_map

return self.__get_correct_declention(value=original_value.Value,
template=template,
cases_map=cases_map)

def __get_correct_declention(self, value, template, cases_map):
assert(isinstance(value, unicode))
assert(isinstance(template, unicode))
assert(isinstance(value, str))
assert(isinstance(template, str))
assert(isinstance(cases_map, dict))

num = self.__pos_tagger.get_term_number(value)
Expand Down
6 changes: 3 additions & 3 deletions arekit/common/entities/formatters/str_rus_nocased_fmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ def to_string(self, original_value, entity_type):
assert(isinstance(entity_type, EntityType))

if (entity_type == EntityType.Object) or (entity_type == EntityType.SynonymObject):
return u"объект"
return "объект"
elif (entity_type == EntityType.Subject) or (entity_type == EntityType.SynonymSubject):
return u"субъект"
return "субъект"
if entity_type == EntityType.Other:
return u"сущность"
return "сущность"
6 changes: 3 additions & 3 deletions arekit/common/entities/formatters/str_simple_fmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ def to_string(self, original_value, entity_type):
assert(isinstance(entity_type, EntityType))

if entity_type == EntityType.Other:
return u"e"
return "e"
elif entity_type == EntityType.Object or entity_type == EntityType.SynonymObject:
return u"object"
return "object"
elif entity_type == EntityType.Subject or entity_type == EntityType.SynonymSubject:
return u"subject"
return "subject"

return None
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ def to_string(self, original_value, entity_type):
assert(isinstance(entity_type, EntityType))

if (entity_type == EntityType.Object) or (entity_type == EntityType.SynonymObject):
return u"#O"
return "#O"
elif (entity_type == EntityType.Subject) or (entity_type == EntityType.SynonymSubject):
return u"#S"
return "#S"
elif entity_type == EntityType.Other:
return u"#E"
return "#E"
6 changes: 3 additions & 3 deletions arekit/common/entities/formatters/str_simple_uppercase_fmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ def to_string(self, original_value, entity_type):
assert(isinstance(entity_type, EntityType))

if entity_type == EntityType.Other:
mask = u"ENTITY"
mask = "ENTITY"
elif entity_type == EntityType.Subject or entity_type == EntityType.SynonymSubject:
mask = u"E_SUBJ"
mask = "E_SUBJ"
elif entity_type == EntityType.Object or entity_type == EntityType.SynonymObject:
mask = u"E_OBJ"
mask = "E_OBJ"
else:
raise NotImplementedError()

Expand Down
12 changes: 6 additions & 6 deletions arekit/common/entities/formatters/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ class EntityFormatterTypes(Enum):
class EntityFormattersService:

__names = {
u"rus-cased-fmt": EntityFormatterTypes.RussianCased,
u'rus-simple': EntityFormatterTypes.RussianSimple,
u'simple-uppercase': EntityFormatterTypes.SimpleUppercase,
u'simple': EntityFormatterTypes.Simple,
u'sharp-simple': EntityFormatterTypes.SimpleSharpPrefixed,
"rus-cased-fmt": EntityFormatterTypes.RussianCased,
'rus-simple': EntityFormatterTypes.RussianSimple,
'simple-uppercase': EntityFormatterTypes.SimpleUppercase,
'simple': EntityFormatterTypes.Simple,
'sharp-simple': EntityFormatterTypes.SimpleSharpPrefixed,
}

@staticmethod
def __iter_supported_names():
return iter(EntityFormattersService.__names.keys())
return iter(list(EntityFormattersService.__names.keys()))

@staticmethod
def get_type_by_name(name):
Expand Down
6 changes: 3 additions & 3 deletions arekit/common/evaluation/evaluators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ def __iter_diff_core(self, etalon_opins, test_opins):
if self.__eval_mode == EvaluationModes.Classification:
# That could not be possible, since we perform
# classification of already provided opinions.
raise Exception(u"Opinion of test collection (`{s}`->`{t}`) was not "
u"found in etalon collection!".format(s=o_test.SourceValue,
raise Exception("Opinion of test collection (`{s}`->`{t}`) was not "
"found in etalon collection!".format(s=o_test.SourceValue,
t=o_test.TargetValue))
elif self.__eval_mode == EvaluationModes.Extraction:
yield [o_test, None, o_test.Sentiment]
Expand All @@ -86,7 +86,7 @@ def _check_is_supported(self, label, is_label_supported):
return True

if not is_label_supported(label):
raise Exception(u"Label \"{label}\" is not supported by {e}".format(
raise Exception("Label \"{label}\" is not supported by {e}".format(
label=label_to_str(label),
e=type(self).__name__))

Expand Down
6 changes: 3 additions & 3 deletions arekit/common/evaluation/evaluators/cmp_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,18 @@ def create_template_df(rows_count):
DocumentCompareTable.C_CMP])

# filling with blank rows.
df[DocumentCompareTable.C_ID] = range(rows_count)
df[DocumentCompareTable.C_ID] = list(range(rows_count))
df.set_index(DocumentCompareTable.C_ID, inplace=True)

return df

@classmethod
def load(cls, filepath):
assert(isinstance(filepath, unicode))
assert(isinstance(filepath, str))
return cls(cmp_table=pd.DataFrame.from_csv(filepath))

def save(self, filepath):
assert(isinstance(filepath, unicode))
assert(isinstance(filepath, str))
self.__cmp_table.to_csv(filepath)

def filter_result_column_by_label(self, label):
Expand Down
6 changes: 3 additions & 3 deletions arekit/common/evaluation/results/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ def is_label_supported(self, label):
return label in self.__supported_labels

def get_result_by_metric(self, metric_name):
assert(isinstance(metric_name, unicode))
assert(isinstance(metric_name, str))
return self._total_result[metric_name]

def iter_total_by_param_results(self):
assert(self._total_result is not None)
return self._total_result.iteritems()
return iter(self._total_result.items())

def iter_dataframe_cmp_tables(self):
return self._cmp_tables.iteritems()
return iter(self._cmp_tables.items())

def reg_doc(self, cmp_pair, cmp_table):
""" Registering cmp_table.
Expand Down
4 changes: 2 additions & 2 deletions arekit/common/experiment/annot/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def __iter_annotated_collections(self, data_type, filter_func, doc_ops, opin_ops
assert(isinstance(doc_ops, DocumentOperations))
assert(isinstance(opin_ops, OpinionOperations))

docs_to_annot_list = filter(filter_func,
doc_ops.iter_doc_ids_to_annotate())
docs_to_annot_list = list(filter(filter_func,
doc_ops.iter_doc_ids_to_annotate()))

if len(docs_to_annot_list) == 0:
logger.info("[{}]: Nothing to annotate".format(data_type))
Expand Down
4 changes: 2 additions & 2 deletions arekit/common/experiment/annot/single_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ def __init__(self, dist_in_terms_bound, label_instance, dist_in_sents=0, ignored
def __create_key_by_entity_pair(e1, e2):
assert(isinstance(e1, Entity))
assert(isinstance(e2, Entity))
return u"{}_{}".format(e1.IdInDocument, e2.IdInDocument)
return "{}_{}".format(e1.IdInDocument, e2.IdInDocument)

def __is_ignored_entity_value(self, entity_value):
assert(isinstance(entity_value, unicode))
assert(isinstance(entity_value, str))
return entity_value in self.__ignored_entity_values

def __iter_opinions_between_entities(self, relevant_pairs, entities_collection):
Expand Down
8 changes: 4 additions & 4 deletions arekit/common/experiment/cv/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self, supported_data_types, doc_ids_to_fold, cv_count, splitter):
assert(isinstance(splitter, CrossValidationSplitter))

if len(supported_data_types) > 2:
raise NotImplementedError(u"Experiments with such amount of data-types are not supported!")
raise NotImplementedError("Experiments with such amount of data-types are not supported!")

super(TwoClassCVFolding, self).__init__(doc_ids_to_fold=doc_ids_to_fold,
supported_data_types=supported_data_types)
Expand All @@ -27,7 +27,7 @@ def CVCount(self):

@property
def Name(self):
return u"cv{0}".format(self.__cv_count)
return "cv{0}".format(self.__cv_count)

# endregion

Expand All @@ -47,7 +47,7 @@ def fold_doc_ids_set(self):
}

if self.__splitter is None:
raise NotImplementedError(u"Splitter has not been intialized!")
raise NotImplementedError("Splitter has not been intialized!")

it = self.__splitter.items_to_cv_pairs(doc_ids=set(doc_ids),
cv_count=self.__cv_count)
Expand All @@ -70,6 +70,6 @@ def iter_states(self):
def get_current_state(self):
""" Providing current iteration index.
"""
return unicode(self._iteration_index)
return str(self._iteration_index)

# endregion
2 changes: 1 addition & 1 deletion arekit/common/experiment/cv/splitters/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def items_to_cv_pairs(self, doc_ids, cv_count):
chunks = self.__chunk_it(doc_ids, cv_count)

for test_index, chunk in enumerate(chunks):
train_indices = range(len(chunks))
train_indices = list(range(len(chunks)))
train_indices.remove(test_index)

large = [v for train_index in train_indices for v in chunks[train_index]]
Expand Down
2 changes: 1 addition & 1 deletion arekit/common/experiment/data/serializing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, label_scaler, annot, stemmer):
self.__label_scaler = label_scaler

if self.LabelsCount != annot.LabelsCount:
raise Exception(u"Label scaler and annotator are incompatible due to differs in labels count!")
raise Exception("Label scaler and annotator are incompatible due to differs in labels count!")

self.__annot = annot

Expand Down
2 changes: 1 addition & 1 deletion arekit/common/experiment/extract/opinions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __iter_linked_text_opinion_lists(news, opin_ops, data_type, filter_text_opin
linked_text_opinions = news.extract_linked_text_opinions(opinion)
assert(linked_text_opinions, LinkedTextOpinionsWrapper)

filtered_text_opinions = filter(filter_text_opinion_func, linked_text_opinions)
filtered_text_opinions = list(filter(filter_text_opinion_func, linked_text_opinions))

if len(filtered_text_opinions) == 0:
continue
Expand Down
4 changes: 2 additions & 2 deletions arekit/common/experiment/folding/fixed.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def __init__(self, doc_to_dtype_func, doc_ids_to_fold, supported_data_types):

@property
def Name(self):
return u"fixed"
return "fixed"

def fold_doc_ids_set(self):

Expand All @@ -29,4 +29,4 @@ def fold_doc_ids_set(self):
def get_current_state(self):
""" Returns in order to be compatible with cv-based experiment format.
"""
return u"0"
return "0"
6 changes: 3 additions & 3 deletions arekit/common/experiment/folding/nofold.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ class NoFolding(BaseExperimentDataFolding):

def __init__(self, doc_ids_to_fold, supported_data_types):
if len(supported_data_types) > 1:
raise NotImplementedError(u"Experiments with such amount of data-types are not supported!")
raise NotImplementedError("Experiments with such amount of data-types are not supported!")

super(NoFolding, self).__init__(doc_ids_to_fold=doc_ids_to_fold,
supported_data_types=supported_data_types)

@property
def Name(self):
return u"na"
return "na"

def fold_doc_ids_set(self):
return {
Expand All @@ -24,4 +24,4 @@ def fold_doc_ids_set(self):
def get_current_state(self):
""" Returns in order to be compatible with cv-based experiment format.
"""
return u"0"
return "0"
Loading

0 comments on commit b2ac8cf

Please sign in to comment.