cleaned up formatting slightly

CODAIT · Jul 12, 2021 · fd52cf2 · fd52cf2
1 parent 6878381
commit fd52cf2
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 49 deletions.
diff --git a/text_extensions_for_pandas/io/conll.py b/text_extensions_for_pandas/io/conll.py
@@ -769,7 +769,7 @@ def _doc_to_df(
  ret["line_num"] = pd.Series(doc_line_nums)
  if conll_u and "head" in column_names:
  ret = ret.astype({"head": "Int64"}, errors="ignore")
- ret.loc[ret['head'] == -1, 'head'] = pd.NA
+ ret.loc[ret["head"] == -1, "head"] = pd.NA
  return ret
 
 
@@ -1302,25 +1302,25 @@ def maybe_download_dataset_data(
  target_dir: str, document_url: str, alternate_name: str = None
 ) -> Union[str, List[str]]:
  """
- If the file found at the github url is not found in the target directory,
- downloads it from the github url, and saves it to that plave in downloads.
- Returns the path to the file. If a zip archive is downloaded, only files that are not already in the target
- directory will be fetched, and if an alternate_name is given only that file will be operated on.
- Note if a Zip archive is downloaded it will be unpacked so verify that the url being used is safe.
-
- :param target_dir: Directory where this function should write the document
- :param document_url: url from which to download the docuemnt. If no alternate name is specified,
- it is assumed that the string after the last slash is the name of the file.
- :param alternate_name: if given, the name of the file that is checked in the target directory,
- as well as what is used to save the file if no such file is found. If a zip file is downloaded, and a file of this
- name exists in in the archive, only it will be extracted.
-
- :returns: the path to the file, or None if downloading was not successful
+  If the file found at the github url is not found in the target directory,
+  downloads it from the github url, and saves it to that plave in downloads.
+  Returns the path to the file. If a zip archive is downloaded, only files that are not already in the target
+  directory will be fetched, and if an alternate_name is given only that file will be operated on.
+  Note if a Zip archive is downloaded it will be unpacked so verify that the url being used is safe.
+
+  :param target_dir: Directory where this function should write the document
+  :param document_url: url from which to download the docuemnt. If no alternate name is specified,
+  it is assumed that the string after the last slash is the name of the file.
+  :param alternate_name: if given, the name of the file that is checked in the target directory,
+  as well as what is used to save the file if no such file is found. If a zip file is downloaded, and a file of this
+  name exists in in the archive, only it will be extracted.
+
+  :returns: the path to the file, or None if downloading was not successful
  """
  file_name = (
  alternate_name if alternate_name is not None else document_url.split("/")[-1]
  )
- full_path = target_dir +'/' + file_name
+ full_path = target_dir + "/" + file_name
  # if no directory exists, create one
  if not os.path.exists(target_dir):
  os.mkdir(target_dir)
@@ -1329,13 +1329,13 @@ def maybe_download_dataset_data(
  if document_url.split(".")[-1] == "zip" and (
  alternate_name is None or not os.path.exists(full_path)
  ):
- # if we have a zip file already, don't re-download it 
- zipPath = target_dir +'/'+ document_url.split("/")[-1]
+ # if we have a zip file already, don't re-download it
+ zipPath = target_dir + "/" + document_url.split("/")[-1]
  if not os.path.exists(zipPath):
  data = requests.get(document_url)
  open(zipPath, "wb").write(data.content)
- 
- # if need be, extract the zipfile documents 
+
+ # if need be, extract the zipfile documents
  with ZipFile(zipPath, "r") as zipf:
  fnames = zipf.namelist()
  if alternate_name is not None and alternate_name in fnames:
@@ -1345,9 +1345,9 @@ def maybe_download_dataset_data(
  if not os.path.exists(target_dir + fname):
  zipf.extract(fname, target_dir)
  if len(fnames) == 1:
- full_path = target_dir +'/'+ fnames[0]
+ full_path = target_dir + "/" + fnames[0]
  else:
- return [target_dir + '/'+ fname for fname in fnames]
+ return [target_dir + "/" + fname for fname in fnames]
 
  # regular logic
  elif not os.path.exists(full_path):

diff --git a/text_extensions_for_pandas/io/test_conll.py b/text_extensions_for_pandas/io/test_conll.py
@@ -736,40 +736,40 @@ def test_compute_accuracy(self):
 
  def test_maybe_download_dataset(self):
  base_dir = "test_data/io/test_conll"
- ewt_dir = base_dir + '/ewt'
- conll9_dir = base_dir + '/conll9'
+ ewt_dir = base_dir + "/ewt"
+ conll9_dir = base_dir + "/conll9"
  ewt_url = "https://github.com/UniversalDependencies/UD_English-EWT/blob/master/en_ewt-ud-dev.conllu"
- conll_09_test_data_url = 'https://ufal.mff.cuni.cz/conll2009-st/trial/CoNLL2009-ST-English-trial.zip'
-
- #test download of file
- val = maybe_download_dataset_data(ewt_dir,ewt_url)
- self.assertEqual(val,ewt_dir + '/en_ewt-ud-dev.conllu')
+ conll_09_test_data_url = (
+ "https://ufal.mff.cuni.cz/conll2009-st/trial/CoNLL2009-ST-English-trial.zip"
+ )
+
+ # test download of file
+ val = maybe_download_dataset_data(ewt_dir, ewt_url)
+ self.assertEqual(val, ewt_dir + "/en_ewt-ud-dev.conllu")
  self.assertTrue(os.path.isdir(ewt_dir))
- self.assertTrue(os.path.isfile(ewt_dir + '/en_ewt-ud-dev.conllu'))
- #test download of 
- val = maybe_download_dataset_data(ewt_dir,ewt_url,alternate_name="dev.conllu")
- self.assertEqual(val,ewt_dir + '/dev.conllu')
+ self.assertTrue(os.path.isfile(ewt_dir + "/en_ewt-ud-dev.conllu"))
+ # test download of
+ val = maybe_download_dataset_data(ewt_dir, ewt_url, alternate_name="dev.conllu")
+ self.assertEqual(val, ewt_dir + "/dev.conllu")
  self.assertTrue(os.path.isdir(ewt_dir))
- self.assertTrue(os.path.isfile(ewt_dir + '/dev.conllu'))
+ self.assertTrue(os.path.isfile(ewt_dir + "/dev.conllu"))
  # check we didn't overwrite the last file
- self.assertTrue(os.path.isfile(ewt_dir + '/en_ewt-ud-dev.conllu'))
-
+ self.assertTrue(os.path.isfile(ewt_dir + "/en_ewt-ud-dev.conllu"))
 
- #test zip 
- conll_9_file = conll9_dir + '/CoNLL2009-ST-English-trial.txt'
- val = maybe_download_dataset_data(conll9_dir,conll_09_test_data_url)
- self.assertEqual(val,conll_9_file)
+ # test zip
+ conll_9_file = conll9_dir + "/CoNLL2009-ST-English-trial.txt"
+ val = maybe_download_dataset_data(conll9_dir, conll_09_test_data_url)
+ self.assertEqual(val, conll_9_file)
  self.assertTrue(os.path.isdir(conll9_dir))
  self.assertTrue(os.path.isfile(conll_9_file))
- #verify we don't double download for zips
- os.remove(conll9_dir + '/CoNLL2009-ST-English-trial.zip')
- maybe_download_dataset_data(conll9_dir,
- conll_09_test_data_url,
- alternate_name='/CoNLL2009-ST-English-trial.txt')
- self.assertFalse(os.path.exists(conll9_dir + 'CoNLL2009-ST-English-trial.zip'))
-
-
-
+ # verify we don't double download for zips
+ os.remove(conll9_dir + "/CoNLL2009-ST-English-trial.zip")
+ maybe_download_dataset_data(
+ conll9_dir,
+ conll_09_test_data_url,
+ alternate_name="/CoNLL2009-ST-English-trial.txt",
+ )
+ self.assertFalse(os.path.exists(conll9_dir + "CoNLL2009-ST-English-trial.zip"))
 
 
 if __name__ == "__main__":