Skip to content

Commit

Permalink
Merge pull request #14 from qwqcode/dev
Browse files Browse the repository at this point in the history
Generate Form by Spider class or field specified Attributes in only o…
  • Loading branch information
qwqcode committed May 18, 2019
2 parents a373951 + 069bafb commit 253d8cb
Show file tree
Hide file tree
Showing 17 changed files with 274 additions and 167 deletions.
16 changes: 8 additions & 8 deletions Nacollector/Browser/CrBrowser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public class CrBrowser
private MainForm _form;
private ChromiumWebBrowser browser;
private DownloadManager downloadManager;
public bool CheckIsAppUrl(string url) => url.IndexOf("https://127.0.0.1") == 0 || url.IndexOf("nacollector:https://") == 0;

public CrBrowser(MainForm form, string address)
{
Expand Down Expand Up @@ -48,8 +49,9 @@ public CrBrowser(MainForm form, string address)
browser.LoadHandler = new LoadHandler();
browser.DragHandler = new DragDropHandler();

browser.FrameLoadEnd += new EventHandler<FrameLoadEndEventArgs>(Browser_onFrameLoadEnd);
browser.IsBrowserInitializedChanged += new EventHandler<IsBrowserInitializedChangedEventArgs>(Browser_onIsBrowserInitializedChanged);
browser.FrameLoadStart += Browser_FrameLoadStart;
browser.FrameLoadEnd += Browser_onFrameLoadEnd;
browser.IsBrowserInitializedChanged += Browser_onIsBrowserInitializedChanged;

// 向前端暴露 C# 函数
CefSharpSettings.LegacyJavascriptBindingEnabled = true; // Need Update: https://github.com/cefsharp/CefSharp/issues/2246
Expand All @@ -59,19 +61,17 @@ public CrBrowser(MainForm form, string address)
downloadManager = new DownloadManager(this);
}

public bool CheckIsAppUrl(string url)
// Frame 开始加载时
private void Browser_FrameLoadStart(object sender, FrameLoadStartEventArgs e)
{
return url.IndexOf("https://127.0.0.1") == 0 || url.IndexOf("nacollector:https://") == 0;
string url = e.Frame.Url;
((DragDropHandler)browser.DragHandler).Enable = CheckIsAppUrl(url); // 开启 / 关闭拖拽功能
}

// Frame 加载完毕时执行
private void Browser_onFrameLoadEnd(object sender, FrameLoadEndEventArgs e)
{
string url = e.Frame.Url;
if (CheckIsAppUrl(url))
{
((DragDropHandler)browser.DragHandler).Enable = true; // 开启拖拽功能
}
}

// 浏览器初始化完毕时执行
Expand Down
8 changes: 7 additions & 1 deletion Nacollector/MainForm.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@ private void InitBrowser()
string url = e.Frame.Url;
if (crBrowser.CheckIsAppUrl(url))
{
crBrowser.RunJS(NacollectorUtils.GenFormList.GetCode());
// 获取并前端执行表单生成代码
_mainForm.BeginInvoke((MethodInvoker)delegate
{
var spiderDomain = taskRunner.GetLoadSpiderDomain();
crBrowser.RunJS(spiderDomain.GetFormGenJsCode());
taskRunner.UnloadSpiderDomain();
});
}
_splashScreen.Hide();
this.Invoke((MethodInvoker)delegate
Expand Down
6 changes: 6 additions & 0 deletions Nacollector/TaskManager/SpiderDomain.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using NacollectorUtils.Settings;
using System.Diagnostics;
using NacollectorSpiders;
using NacollectorSpiders.Lib;

namespace Nacollector.TaskManager
{
Expand Down Expand Up @@ -37,5 +38,10 @@ public void NewTask(string fullClassName, SpiderSettings settings, SpiderCallbac
Spider spider = (Spider)Activator.CreateInstance(tp);
spider.NewTask(settings, callback);
}

public string GetFormGenJsCode()
{
return SpiderIndex.BuildAllSpiderFormJsCode();
}
}
}
45 changes: 32 additions & 13 deletions NacollectorSpiders/Business/CollItemDescImg.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,36 +14,55 @@
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Web;
using NacollectorSpiders.Lib;

namespace NacollectorSpiders.Business
{
/// <summary>
/// 商品详情页图片解析
/// </summary>
[SpiderRegister(Label = "商品详情页图片解析")]
public class CollItemDescImg : Spider
{
// 参数
string PageUrl = "";
string PageType = "";
string ImgType = "";
string CollType = "";
[FormTextInput(Label = "详情页链接", Type = "textInput", Parms = "'', InputValidators.isUrl")]
public string PageUrl; // 不要使用 new Uri(),因为会把 urlencode 的参数自动 decode

[FormTextInput(Label = "链接类型", Type = "selectInput", Parms = @"{
'Tmall': '天猫',
'Taobao': '淘宝',
'Alibaba': '阿里巴巴',
'Suning': '苏宁易购',
'Gome': '国美在线'
}")]
public string PageType;

[FormTextInput(Label = "图片类型", Type = "selectInput", Parms = @"{
'Thumb': '主图',
'Category': '分类图',
'Desc': '详情图'
}")]
public string ImgType;

[FormTextInput(Label = "采集模式", Type = "selectInput", Parms = @"{
'collImgSrcUrl': '显示图片链接',
'collDownloadImgSrc': '显示图片链接 并 下载打包保存'
}")]
public string CollType;

// 页面内容
string pageContent;
private string pageContent;

// CsQuery Dom
CQ pageDom;
private CQ pageDom;

// 图片链接池
Dictionary<string, ArrayList> imgUrlPool = new Dictionary<string, ArrayList>();
private Dictionary<string, ArrayList> imgUrlPool = new Dictionary<string, ArrayList>();

public override void BeginWork()
{
base.BeginWork();

// 参数设定
PageUrl = GetParm("PageUrl"); // 若使用 new Uri() 会把 urlencode 的参数自动 decode
PageType = GetParm("PageType");
ImgType = GetParm("ImgType");
CollType = GetParm("CollType");

// 下载页面
LogInfo("开始下载:" + PageUrl);

Expand Down
10 changes: 5 additions & 5 deletions NacollectorSpiders/Business/CollItemDescVideo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,19 @@
using System.Threading.Tasks;
using System.Web;
using System.Windows.Forms;
using NacollectorSpiders.Lib;

namespace NacollectorSpiders.Business
{
/// <summary>
/// 详情页视频采集
/// 商品详情页视频抓取
/// </summary>
[SpiderRegister(Label = "商品详情页视频抓取")]
public class CollItemDescVideo : Spider
{
// 参数
string PageUrl = "";

// ts 池
List<string> tsLinkPool = new List<string>();
[FormTextInput(Label = "详情页链接", Type = "textInput", Parms = "'', InputValidators.isUrl")]
string PageUrl;

public override void BeginWork()
{
Expand Down
35 changes: 23 additions & 12 deletions NacollectorSpiders/Business/TaobaoSellerColl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,44 @@
using System.Threading.Tasks;
using System.Web;
using System.Windows.Forms;
using NacollectorSpiders.Lib;

namespace NacollectorSpiders.Business
{
/// <summary>
/// 淘宝店铺搜索卖家ID名采集
/// </summary>
[SpiderRegister(Label = "淘宝店铺搜索卖家ID名采集")]
public class TaobaoSellerColl : Spider
{
// 参数
string PageUrl = "";
int CollBeginPage = 0;
int CollEndPage = 0;
bool IgnoreTmall = false;
[FormTextInput(Label = "店铺搜索页链接", Type = "textInput", Parms = "'', InputValidators.isUrl")]
public string PageUrl;

[FormTextInput(Label = "采集开始页码", Type = "numberInput", Parms = "1, 1")]
public int CollBeginPage;

[FormTextInput(Label = "采集结束页码", Type = "numberInput", Parms = "undefined, 1")]
public int CollEndPage;

[FormTextInput(Label = "忽略天猫卖家", Type = "selectInput", Parms = @"{
'on': '开启',
'off': '关闭'
}")]
public string IgnoreTmall;

private bool _IgnoreTmall;

// 卖家名池
List<string> sellerNames = new List<string>();
private List<string> sellerNames = new List<string>();

public override void BeginWork()
{
base.BeginWork();

// 参数设定
PageUrl = GetParm("PageUrl").Trim();
bool CollBeginPageIsInt = Int32.TryParse(GetParm("CollBeginPage"), out CollBeginPage); // out 到 this.CollBeginPage 里 2333
if (!CollBeginPageIsInt) throw new Exception("参数 CollBeginPage 不是数字");
bool CollEndPageIsInt = Int32.TryParse(GetParm("CollEndPage"), out CollEndPage);
if (!CollEndPageIsInt) throw new Exception("参数 CollEndPage 不是数字");
if (CollBeginPage <= 0 || CollEndPage <= 0 || CollBeginPage > CollEndPage) throw new Exception("老铁,你输入的参数是什么鬼?");
IgnoreTmall = GetParm("IgnoreTmall").Trim().ToLower() == "on" ? true : false;
_IgnoreTmall = IgnoreTmall.Trim().ToLower() == "on" ? true : false;

for (int i = CollBeginPage; i <= CollEndPage; i++)
{
Expand Down Expand Up @@ -86,7 +97,7 @@ private void WorkOnPage(int page)
int addedCount = 0;
foreach (var item in items)
{
if (IgnoreTmall && item["isTmall"].ToString().Trim().ToLower() == "true")
if (_IgnoreTmall && item["isTmall"].ToString().Trim().ToLower() == "true")
continue;

var seller = item["nick"].ToString().Trim();
Expand Down
18 changes: 11 additions & 7 deletions NacollectorSpiders/Business/TmallGxptInvite.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,33 @@
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
using NacollectorSpiders.Lib;

namespace NacollectorSpiders.Business
{
/// <summary>
/// 天猫供销平台分销商一键邀请
/// </summary>
[SpiderRegister(Label = "天猫供销平台分销商一键邀请")]
public class TmallGxptInvite : Spider
{
// 参数
string SellerId = "";
string[] SellerIdArr = null;
[FormTextInput(Label = "分销商ID名(一行一个)", Type = "textareaInput", Parms = "undefined, 250")]
public string SellerId;

string cookieStr = null;
string _tb_token_ = null;
private string[] SellerIdArr = null;

List<string> errorSeller = new List<string>(); // 未邀请成功的卖家
int maxErrorThreshold = 5; // 最多错误阈值
private string cookieStr = null;
private string _tb_token_ = null;

private List<string> errorSeller = new List<string>(); // 未邀请成功的卖家
private int maxErrorThreshold = 5; // 最多错误阈值

public override void BeginWork()
{
base.BeginWork();

// 参数设定
SellerId = GetParm("SellerId").Trim();
SellerIdArr = SellerId.Split(new string[] { "\r\n", "\n" }, StringSplitOptions.RemoveEmptyEntries);
if (SellerIdArr.Length <= 0) { throw new Exception("卖家ID不能一个也没有啊"); }
// 获取 Cookie
Expand Down
23 changes: 13 additions & 10 deletions NacollectorSpiders/Business/TmallGxptInviteDelete.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,35 @@
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
using NacollectorSpiders.Lib;

namespace NacollectorSpiders.Business
{
/// <summary>
/// 天猫供销平台分销商一键撤回
/// </summary>
class TmallGxptInviteDelete : Spider
[SpiderRegister(Label = "天猫供销平台分销商一键撤回")]
public class TmallGxptInviteDelete : Spider
{
// 参数
int DeleteBeginPage = 0;
int DeleteEndPage = 0;
[FormTextInput(Label = "撤回开始页码", Type = "numberInput", Parms = "1, 1")]
public int DeleteBeginPage;

string cookieStr = null;
[FormTextInput(Label = "撤回结束页码", Type = "numberInput", Parms = "undefined, 1")]
public int DeleteEndPage;

List<string> errorSeller = new List<string>(); // 未撤回成功的卖家
int maxErrorThreshold = 5; // 最多错误阈值
private string cookieStr = null;

private List<string> errorSeller = new List<string>(); // 未撤回成功的卖家
private int maxErrorThreshold = 5; // 最多错误阈值

public override void BeginWork()
{
base.BeginWork();

// 参数设定
bool DeleteBeginPageIsInt = Int32.TryParse(GetParm("DeleteBeginPage"), out DeleteBeginPage);
if (!DeleteBeginPageIsInt) throw new Exception("参数 DeleteBeginPage 不是数字");
bool DeleteEndPageIsInt = Int32.TryParse(GetParm("DeleteEndPage"), out DeleteEndPage);
if (!DeleteEndPageIsInt) throw new Exception("参数 DeleteEndPage 不是数字");
if (DeleteBeginPage <= 0 || DeleteEndPage <= 0 || DeleteBeginPage > DeleteEndPage) throw new Exception("老铁,你输入的参数是什么鬼?");

// 获取 Cookie
var cgSettings = new NacollectorUtils.Settings.CookieGetterSettings
{
Expand Down
23 changes: 23 additions & 0 deletions NacollectorSpiders/Lib/FormTextInputAttribute.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace NacollectorSpiders.Lib
{
[AttributeUsage(AttributeTargets.Field)]
public class FormTextInputAttribute : Attribute
{
/// <summary>
/// 字段标签
/// </summary>
public string Label { get; set; }

public bool Required { get; set; } = true;

public string Type { get; set; } = "textInput";

public string Parms { get; set; } = "";
}
}
Loading

0 comments on commit 253d8cb

Please sign in to comment.