forked from DocNow/twarc-csv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_twarc_csv.py
145 lines (83 loc) · 2.74 KB
/
test_twarc_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import pandas
import pathlib
import twarc_csv
from click.testing import CliRunner
runner = CliRunner()
test_data = pathlib.Path("test-data")
def _process_file(fname, expected=None, extra=""):
input_file = test_data / f"{fname}.jsonl"
output_file = test_data / f"{fname}.csv"
if output_file.is_file():
output_file.unlink()
result = runner.invoke(
twarc_csv.csv, f"{str(input_file)} {str(output_file)}{extra}"
)
assert output_file.is_file()
df = pandas.read_csv(output_file)
if expected:
assert len(df) == expected
else:
assert len(df) > 0
if "counts" not in extra:
assert type(df["id"]) == pandas.Series
if output_file.is_file():
output_file.unlink()
def test_empty():
input_file = test_data / f"empty.jsonl"
output_file = test_data / f"empty.csv"
result = runner.invoke(twarc_csv.csv, [str(input_file), str(output_file)])
assert not output_file.is_file()
def test_noflat():
_process_file("noflat")
def test_flat():
_process_file("flat")
def test_expected_tweets():
_process_file("two_tweets", 2, extra=" --inline-referenced-tweets")
def test_2sets():
_process_file("2sets")
def test_brexit():
_process_file("brexit")
def test_kpop():
_process_file("kpop")
def test_streaming_output_with_error():
_process_file("streaming_output_with_error")
def test_withheld():
_process_file("withheld")
def test_withheld2():
_process_file("media_policy_violation_on")
def test_withheld3():
_process_file("media_policy_violation_off")
def test_users():
_process_file("users", 91, extra=" --input-data-type users")
def test_compliance_users():
_process_file("users_compliance", 1, extra=" --input-data-type compliance")
def test_compliance_tweets():
_process_file("tweets_compliance", 2, extra=" --input-data-type compliance")
def test_counts():
_process_file("counts", 169, extra=" --input-data-type counts")
def test_lists():
_process_file("lists", 6, extra=" --input-data-type lists")
def test_geo():
_process_file("geo_tweets", 2, extra=" --input-data-type tweets")
def test_cotweet():
_process_file("cotweet")
def test_edited():
_process_file("edited")
def test_edited_after():
_process_file("edited_after")
def test_edited_before():
_process_file("edited_before")
def test_quoted_edit():
_process_file("quoted_edit")
def test_cashtags():
_process_file("cashtags")
def test_media_tweet():
_process_file("media_tweet")
def test_mentions():
_process_file("mentions_tweet")
def test_many_urls():
_process_file("many_urls")
def test_verified_type():
_process_file("verified_type")
def test_missing_entities():
_process_file("entities_test")