[middleware] return HtmlResponse to spider

scrapy-plugins · pawelmhm · Apr 3, 2015 · Apr 3, 2015 · Apr 3, 2015 · May 26, 2015
commit 9efd145d0b7c6b7d5e743948ecbb124aa2b9def5
diff --git a/scrapyjs/middleware.py b/scrapyjs/middleware.py
@@ -6,6 +6,7 @@
 from scrapy.exceptions import NotConfigured
 
 from scrapy import log
+from scrapy.http.response.html import HtmlResponse
 from scrapy.http.headers import Headers
 
 
@@ -107,20 +108,31 @@ def process_request(self, request, spider):
  # are not respected.
  headers=Headers({'Content-Type': 'application/json'}),
  )
-
  self.crawler.stats.inc_value('splash/%s/request_count' % endpoint)
  return req_rep
 
  def process_response(self, request, response, spider):
- splash_options = request.meta.get("_splash_processed")
+ splash_options = self.get_splash_options(request, spider)
  if splash_options:
  endpoint = splash_options['endpoint']
  self.crawler.stats.inc_value(
  'splash/%s/response_count/%s' % (endpoint, response.status)
  )
-
+ response = self.html_response(response, request)
  return response
 
+ def html_response(self, response, request):
+ """Give user nice HTML response he probably
+ expects.
+ """
+ data = json.loads(response.body)
+ html = data.get("html")
+ if not html:
+ return response
+
+ return HtmlResponse(data["url"], body=html, encoding='utf8',
+ status=response.status, request=request)
+
  def _set_download_slot(self, request, meta, slot_policy):
  if slot_policy == SlotPolicy.PER_DOMAIN:
  # Use the same download slot to (sort of) respect download