Skip to content

Commit

Permalink
Merge pull request piskvorky#3459 from Holmes5/httpsfix
Browse files Browse the repository at this point in the history
Replace HTTP with HTTPS in enwiki URLs
  • Loading branch information
piskvorky committed Apr 29, 2023
2 parents 525f67a + 5207fab commit 4ca0653
Show file tree
Hide file tree
Showing 8 changed files with 12 additions and 12 deletions.
8 changes: 4 additions & 4 deletions docs/notebooks/Word2Vec_FastText_Comparison.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"--2019-05-12 19:40:14-- http:https://mattmahoney.net/dc/enwik9.zip\n",
"--2019-05-12 19:40:14-- https:https://mattmahoney.net/dc/enwik9.zip\n",
"Resolving mattmahoney.net (mattmahoney.net)... 67.195.197.75\n",
"Connecting to mattmahoney.net (mattmahoney.net)|67.195.197.75|:80... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
Expand All @@ -51,7 +51,7 @@
"\n",
"2019-05-12 19:50:17 (247 KB/s) - Connection closed at byte 152553031. Retrying.\n",
"\n",
"--2019-05-12 19:50:18-- (try: 2) http:https://mattmahoney.net/dc/enwik9.zip\n",
"--2019-05-12 19:50:18-- (try: 2) https:https://mattmahoney.net/dc/enwik9.zip\n",
"Connecting to mattmahoney.net (mattmahoney.net)|67.195.197.75|:80... connected.\n",
"HTTP request sent, awaiting response... 206 Partial Content\n",
"Length: 322592222 (308M), 170039191 (162M) remaining [application/zip]\n",
Expand Down Expand Up @@ -83,11 +83,11 @@
"# download the text8 corpus (a 100 MB sample of cleaned wikipedia text)\n",
"import os.path\n",
"if not os.path.isfile('text8'):\n",
" !wget -c http:https://mattmahoney.net/dc/text8.zip\n",
" !wget -c https:https://mattmahoney.net/dc/text8.zip\n",
" !unzip text8.zip\n",
"# download and preprocess the text9 corpus\n",
"if not os.path.isfile('text9'):\n",
" !wget -c http:https://mattmahoney.net/dc/enwik9.zip\n",
" !wget -c https:https://mattmahoney.net/dc/enwik9.zip\n",
" !unzip enwik9.zip\n",
" !perl {FT_HOME}wikifil.pl enwik9 > text9"
]
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks/Wordrank_comparisons.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
"# download the text8 corpus (a 100 MB sample of preprocessed wikipedia text)\n",
"import os.path\n",
"if not os.path.isfile('text8'):\n",
" !wget -c http:https://mattmahoney.net/dc/text8.zip\n",
" !wget -c https:https://mattmahoney.net/dc/text8.zip\n",
" !unzip text8.zip"
]
},
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks/downloader_api_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@
" \"parts\": 3\n",
" }, \n",
" \"text8\": {\n",
" \"source\": \"http:https://mattmahoney.net/dc/text8.zip\", \n",
" \"source\": \"https:https://mattmahoney.net/dc/text8.zip\", \n",
" \"checksum\": \"68799af40b6bda07dfa47a32612e5364\", \n",
" \"parts\": 1, \n",
" \"description\": \"Cleaned small sample from wikipedia\", \n",
Expand Down
4 changes: 2 additions & 2 deletions docs/notebooks/nmslibtutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"--2019-06-27 13:48:42-- http:https://mattmahoney.net/dc/text8.zip\n",
"--2019-06-27 13:48:42-- https:https://mattmahoney.net/dc/text8.zip\n",
"Resolving mattmahoney.net... 67.195.197.75\n",
"Connecting to mattmahoney.net|67.195.197.75|:80... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
Expand All @@ -106,7 +106,7 @@
"source": [
"import os.path\n",
"if not os.path.isfile('text8'):\n",
" !wget -c http:https://mattmahoney.net/dc/text8.zip\n",
" !wget -c https:https://mattmahoney.net/dc/text8.zip\n",
" !unzip text8.zip"
]
},
Expand Down
2 changes: 1 addition & 1 deletion docs/src/auto_examples/howtos/run_downloader_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ Here's how to list all resources available in gensim-data:
"checksum": "68799af40b6bda07dfa47a32612e5364",
"file_name": "text8.gz",
"read_more": [
"http:https://mattmahoney.net/dc/textdata.html"
"https:https://mattmahoney.net/dc/textdata.html"
],
"parts": 1
},
Expand Down
2 changes: 1 addition & 1 deletion gensim/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def info(name=None, show_only_latest=True, name_only=False):
u'description': u'Cleaned small sample from wikipedia',
u'file_name': u'text8.gz',
u'parts': 1,
u'source': u'http:https://mattmahoney.net/dc/text8.zip'}
u'source': u'https:https://mattmahoney.net/dc/text8.zip'}
>>>
>>> api.info() # retrieve information about all available datasets and models
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -2041,7 +2041,7 @@ def __iter__(self):

class Text8Corpus:
def __init__(self, fname, max_sentence_length=MAX_WORDS_IN_BATCH):
"""Iterate over sentences from the "text8" corpus, unzipped from http:https://mattmahoney.net/dc/text8.zip."""
"""Iterate over sentences from the "text8" corpus, unzipped from https:https://mattmahoney.net/dc/text8.zip."""
self.fname = fname
self.max_sentence_length = max_sentence_length

Expand Down
2 changes: 1 addition & 1 deletion gensim/scripts/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@
print(globals()['__doc__'] % locals())
sys.exit(1)

corpus = Text8Corpus(sys.argv[1]) # text8/text9 format from http:https://mattmahoney.net/dc/textdata.html
corpus = Text8Corpus(sys.argv[1]) # text8/text9 format from https:https://mattmahoney.net/dc/textdata.html
cls = FastText
cls(corpus, workers=12, epochs=1).save(f'/tmp/{cls.__name__}.gensim{__version__}')

0 comments on commit 4ca0653

Please sign in to comment.