diff --git a/example/wechat_pubaccount.py b/example/wechat_pubaccount.py new file mode 100644 index 0000000..ef711f3 --- /dev/null +++ b/example/wechat_pubaccount.py @@ -0,0 +1,15 @@ + +from opendatatools import wechat + +if __name__ == "__main__": + result = wechat.login("PKUJohnson@163.com", "密码") + if result == True: + df, msg = wechat.search_pub("饭桶戴老板") + if df is not None: + for index, row in df.iterrows(): + fakeid = row["fakeid"] + total_msg_cnt = wechat.get_total_msg_count(fakeid) + result = wechat.get_all_articles(fakeid, total_msg_cnt) + print(result) + + diff --git a/opendatatools/__init__.py b/opendatatools/__init__.py index b078e61..9fc3327 100644 --- a/opendatatools/__init__.py +++ b/opendatatools/__init__.py @@ -9,6 +9,6 @@ import os -__version__ = '0.9.9' +__version__ = '1.0.0' SOURCE_ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) diff --git a/opendatatools/common/rest_agent.py b/opendatatools/common/rest_agent.py index fcb0272..eec4c9d 100644 --- a/opendatatools/common/rest_agent.py +++ b/opendatatools/common/rest_agent.py @@ -34,7 +34,7 @@ def do_request(self, url, param = None, method="GET", type="text", encoding = No if json is not None: res = self.session.post(url, json=json, **kwargs) else: - res = self.session.post(url, data=param **kwargs) + res = self.session.post(url, data=param, **kwargs) else: if method == "GET": res = self.session.get(url, params=param, proxies=self.proxies, **kwargs) diff --git a/opendatatools/wechat/__init__.py b/opendatatools/wechat/__init__.py new file mode 100644 index 0000000..44c3a04 --- /dev/null +++ b/opendatatools/wechat/__init__.py @@ -0,0 +1 @@ +from .wechat_interface import * \ No newline at end of file diff --git a/opendatatools/wechat/wechat_agent.py b/opendatatools/wechat/wechat_agent.py new file mode 100644 index 0000000..01ff3a2 --- /dev/null +++ b/opendatatools/wechat/wechat_agent.py @@ -0,0 +1,129 @@ +from opendatatools.common import RestAgent +import hashlib +import json +from PIL import Image +from io import BytesIO +import random +import time +import urllib +import pandas as pd +import math +import datetime +import threading +import functools + + +Host = "mp.weixin.qq.com" +agent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" +refer = "https://mp.weixin.qq.com" +xrw = "XMLHttpRequest" + +loginUrl = "https://mp.weixin.qq.com/cgi-bin/bizlogin?action=startlogin" +qrcodeUrl = "https://mp.weixin.qq.com/cgi-bin/loginqrcode?action=getqrcode¶m=4300&rd=120" +checkLogin = "https://mp.weixin.qq.com/cgi-bin/loginqrcode?action=ask&f=json&ajax=1&random=" +doLogin = "https://mp.weixin.qq.com/cgi-bin/bizlogin?action=login" +searchUrl = "https://mp.weixin.qq.com/cgi-bin/searchbiz?action=search_biz&needToken&lang=zh_CN&f=json&ajax=1&needRandom&needQuery&begin=0&count=5" +appmsg = "https://mp.weixin.qq.com/cgi-bin/appmsg?needToken&lang=zh_CN&f=json&ajax=1&needRandom&action=list_ex&needBegin&needCount&query=&needFakeid&type=9" # 原9,改为1后条数变多 + +def md5(data): + m = hashlib.md5() + m.update(data.encode(encoding='UTF-8')) + return m.hexdigest() + + +def ReqRandom(): + ll = random.random() * 100000000000000000 + result = "0." + str(ll) + return result + +class WechatMPAgent(RestAgent): + def __init__(self): + RestAgent.__init__(self) + + def login(self, username, password): + param = { + "username": username, + "pwd": md5(password), + "imgcode": "", + "f": "json" + } + + self.add_headers({ + "Referer" : "https://mp.weixin.qq.com/cgi-bin/bizlogin?action=validate&lang=zh_CN&account=PKUJohnson@163.com", + "Host" : Host, + "X-Requested-With" : xrw + }) + + resp = self.do_request(loginUrl, method="POST", param=param) + print(resp) + redirect_url = json.loads(resp)["redirect_url"] + + response = self.do_request(qrcodeUrl, method="GET", type="binary") + qrcode = Image.open(BytesIO(response)) + qrcode.show() + + # Check if qrcode is verified + while True: + response = self.do_request(checkLogin+ReqRandom(), method="GET") + rsp = json.loads(response) + + if rsp["status"] == 1: + qrcode.close() + break + else: + time.sleep(2) + + param = { + "userlang": "zh_CN", + "token": "", + "lang": "zh_CN", + "f" : "json", + "ajax" : "1", + + } + + response = self.do_request(doLogin, method="POST", param=param) + rsp = json.loads(response) + print(rsp) + token_url = rsp["redirect_url"] + pos = token_url.find("token=") + token = token_url[pos+6:] + self.token = token + return True + + def get_retcode(self, rsp): + if "base_resp" in rsp and "ret" in rsp["base_resp"] and "err_msg" in rsp["base_resp"]: + return rsp["base_resp"]["ret"], rsp["base_resp"]["err_msg"] + return 0, "ok" + + def search_pub(self, pubno): + _searchUrl = searchUrl.replace("needToken", "token=" + self.token).replace("needRandom", "random=" + ReqRandom()).replace("needQuery", "query=" + pubno) + response = self.do_request(_searchUrl, method="GET") + rsp = json.loads(response) + ret, err_msg = self.get_retcode(rsp) + if ret != 0: + return None, err_msg + df = pd.DataFrame(rsp["list"]) + return df, "" + + def get_article_list(self, fakeid, begin): + count = 5 + _appmsg = appmsg.replace("needToken", "token=" + self.token).replace("needRandom", "random=" + ReqRandom()).replace("needFakeid", "fakeid=" + fakeid) + appmsgTemp = _appmsg.replace("needBegin", "begin=" + str(begin)).replace("needCount", "count=" + str(count)) + + response = self.do_request(appmsgTemp, method="GET") + rsp = json.loads(response) + ret, msg = self.get_retcode(rsp) + # 失败后60秒再试一次 + while msg != "ok": + print(response.text) + time.sleep(60) + response = self.do_request(appmsgTemp, method="GET") + rsp = json.loads(response.text) + ret, msg = self.get_retcode(rsp) + + app_msg_cnt = rsp["app_msg_cnt"] + app_msg_list = rsp["app_msg_list"] + df = pd.DataFrame(app_msg_list) + return app_msg_cnt, df + diff --git a/opendatatools/wechat/wechat_interface.py b/opendatatools/wechat/wechat_interface.py new file mode 100644 index 0000000..a065a80 --- /dev/null +++ b/opendatatools/wechat/wechat_interface.py @@ -0,0 +1,27 @@ +from .wechat_agent import * + +agent = WechatMPAgent() + + +def login(mp_username, mp_password): + return agent.login(mp_username, mp_password) + +def search_pub(pubaccount): + return agent.search_pub(pubaccount) + +def get_total_msg_count(fakeid): + msg_cnt, df = agent.get_article_list(fakeid, 0) + return msg_cnt + +def get_all_articles(fakeid, app_msg_cnt): + count = 0 + df_list = [] + while count < app_msg_cnt: + cnt, df = agent.get_article_list(fakeid, count) + df_list.append(df) + count = count + 5 + print(count) + time.sleep(5) + + result = pd.concat(df_list) + return result