How do I properly test a Scrapy spider Python generator function?

How do I properly test a Scrapy spider Python generator function? - python

I have a Scrapy XMLFeedSpider and I'm trying to test the following parse_node function:
def parse_node(self, response, selector):
date = selector.xpath('pubDate/text()').extract_first()
url = selector.xpath('link/text()').extract_first()
if date < self.cutoff_date: # TEST VALIDITY OF THE DATE
print "Invalid date"
self.log("Article %s before crawler start date" % url)
else:
print "Valid date"
yield scrapy.Request(url, self.parse_post)
I'm trying to test the function for both a valid and an invalid date:
#mock.patch('my_spiders.spiders.myspider.scrapy.Request')
def test_parse_node(self, scrapy_request):
scrapy_request.return_value = mock.MagicMock()
self.spider.log = mock.MagicMock()
mock_response = mock.MagicMock()
mock_selector = mock.MagicMock()
date = self.spider.start_date.strftime("%c")
url = "https://google.com"
mock_selector.xpath.return_value.extract_first = mock.MagicMock(
side_effect=[date, url]
)
parsed_node = self.spider.parse_node(mock_response, mock_selector)
self.assertEqual(tuple(parsed_node)[0], scrapy_request.return_value)
self.spider.log.assert_not_called()
scrapy_request.assert_called_once_with(url, self.spider.parse_post)
#mock.patch('my_spiders.spiders.myspider.scrapy.Request')
def test_parse_node_invalid_date(self, scrapy_request):
scrapy_request.return_value = mock.MagicMock()
self.spider.log = mock.MagicMock()
mock_response = mock.MagicMock()
mock_selector = mock.MagicMock()
date_object = self.spider.start_date - datetime.timedelta(days=1)
date = date_object.strftime("%c")
url = "https://google.com"
mock_selector.xpath.return_value.extract_first = mock.MagicMock(
side_effect=[date, url]
)
parsed_node = self.spider.parse_node(mock_response, mock_selector)
# TODO: figure out why this doesn't work
# self.spider.log.assert_called_once()
scrapy_request.assert_not_called()
The first test, test_parse_node runs as expected. The problem is with the test_parse_node_invalid_date function. If I put a debugger in the parse_node function it doesn't get called. The print functions don't get called either.
I suspect this is some kind of issue with the yield statement/generator, but can't figure out what's happening. Why isn't the second test running through the parse_node function as I'd expect it would?

A python generator function simply returns an iterator. To actually debug that iterator, I had to start the iteration process by invoking the next() method:
parsed_node = self.spider.parse_node(mock_response, mock_selector).next()
I also had to make sure that each test instantiated a new generator, because a generator can only be iterated over one time.
Then I could step through and debug/complete my test as necessary.

Related

Mock.patch returning MagicMock object causing AssertionError?

I have a function that I am trying to test in querySomething.py:
class QuerySomething:
def retrieveIssues(self,token):
responses = []
if "customFields" in self._event:
if not self.custom_fields:
fields = []
else:
fields = self.custom_fields
else:
fields = []
for issueTypeKey, issueTypeValue in self.issueTypes.items():
print(issueTypeKey, ":", issueTypeValue)
query = self.getQuery(issueTypeValue, self.status, fields)
respons = httpClient.get_request(query, token)
responses.append(respons)
return responses
And the test file:
def mock_getQuery():
return "QUERY"
def mock_response(state):
if state=="unauth":
with open("src/tests/mockdata/unauthorized_api_response.json","r") as response_file:
unauth_error = response_file.read()
return json.dumps(unauth_error)
elif state=="auth":
with open("src/tests/mockdata/success_api_response.json","r") as response_file:
success_message = response_file.read()
return json.dumps(success_message)
return "No message"
class test_query(unittest.TestCase):
#mock.patch("querySomething.QuerySomething.getQuery", side_effect=mock_getQuery)
#mock.patch("httpClient.get_request", side_effect=mock_response)
def test_retreiveIssues_unauth_response(self,mock_get,QuerySomething):
self.assertEqual(QuerySomething.retrieveIssues("token"),mock_response("unauth"))
if __name__ == "__main__":
unittest.main()
I am trying to mock the httpClient.get_request so that it gets the JSON file instead of reaching out to the API. We want to test an unauthorized response and a success response which explains the mock_response function. However, when I run the test, I get the following:
AssertionError: <MagicMock name='getQuery.retri[36 chars]712'> != '"{\\n \\"errorMessages\\": [\\n [131 chars]\n}"'
which is somewhat correct, but we need just the text, not the object. I read that I need to call the function, but when I try to call the function it throws a ModuleNotFound or NotAPackage error. What do I need to do to mock the httpClient.get_request and return the JSON string in the retrieveIssues function?

Updated, I was able to pull the JSON from the other file, and then was able to mock the return value as follows:
QuerySomething.retrieveIssues.return_value=load_json("unauth")
where load_json("unauth") pulls from the JSON response file.

Flask loop takes long time to complete

I have this loop in my app.py. For some reason it extends the load time by over 3 seconds. Are there any solutions?
import dateutil.parser as dp
# Converts date from ISO-8601 string to formatted string and returns it
def dateConvert(date):
return dp.parse(date).strftime("%H:%M # %e/%b/%y")
def nameFromID(userID):
if userID is None:
return 'Unknown'
else:
response = requests.get("https://example2.org/" + str(userID), headers=headers)
return response.json()['firstName'] + ' ' + response.json()['lastName']
logs = []
response = requests.get("https://example.org", headers=headers)
for response in response.json():
logs.append([nameFromID(response['member']), dateConvert(response['createdAt'])])

It extends the load time by over 3 seconds because it does a lot of unnecessary work, that's why.
You're not using requests Sessions. Each request will require creating and tearing down an HTTPS connection. That's slow.
You're doing another HTTPS request for each name conversion. (See above.)
You're parsing the JSON you get in that function twice.
Whatever dp.parse() is (dateutil?), it's probably doing a lot of extra work parsing from a free-form string. If you know the input format, use strptime.
Here's a rework that should be significantly faster. Please see the TODO points first, of course.
Also, if you are at liberty to knowing the member id -> name mapping doesn't change, you can make name_cache a suitably named global variable too (but remember it may be persisted between requests).
import datetime
import requests
INPUT_DATE_FORMAT = "TODO_FILL_ME_IN" # TODO: FILL ME IN.
def dateConvert(date: str):
return datetime.datetime.strptime(date, INPUT_DATE_FORMAT).strftime(
"%H:%M # %e/%b/%y"
)
def nameFromID(sess: requests.Session, userID):
if userID is None:
return "Unknown"
response = sess.get(f"https://example2.org/{userID}")
response.raise_for_status()
data = response.json()
return "{firstName} {lastName}".format_map(data)
def do_thing():
headers = {} # TODO: fill me in
name_cache = {}
with requests.Session() as sess:
sess.headers.update(headers)
logs = []
response = sess.get("https://example.org")
for response in response.json():
member_id = response["member"]
name = name_cache.get(member_id)
if not name:
name = name_cache[member_id] = nameFromID(sess, member_id)
logs.append([name, dateConvert(response["createdAt"])])

Python Mock: Result of method call is getitem instead of actual value

I have a unit test and mocking an external call. Here's the abbreviated code for the function I'm trying to test in service.py file
def post_data()
req = request.Request()
response = req.post(payload, url, json.dumps({"data": kwargs['data']}))
if response['request']['status'] == 'SUCCESS' and response['data']:
run_id = response.json()['data']['run_id']
response = track_run_to_completion(run_id, **kwargs)
return response
Here's my unit test method
#patch('service.request.Request.post')
def test_post_data(self, mock_post):
kwargs = {'a':'abc'}
expected = json.dumps({'request':{'status':'ERROR'},'data':{}})
mock_post.return_value = MagicMock(status_code=200, response=expected)
mock_post.assert_called_once_with({'action': 'trigger'}, 'a/abc', '{"data": {}}') # SUCCESS!
result = service.post_data(**kwargs)
print result
When I print the result, I was expecting to see the json, but get <MagicMock name='post()' id='4488707600'>. What am I missing here? I'm new to Python and started writing unit tests for an existing application.

Grinder JDBC test script error" The result of 'TestRunner()' is not callable"

I use JDBC.py script run performance testing . grinder log info:
2015-10-14 18:42:40,132 ERROR com-0 thread-24: aborting thread - {}The result of 'TestRunner()' is not callable
net.grinder.scriptengine.jython.JythonScriptExecutionException: The result of 'TestRunner()' is not callable
at net.grinder.scriptengine.jython.JythonScriptEngine.createWorkerRunnable(JythonScriptEngine.java:183) ~[grinder-core-3.11.jar:na]
at net.grinder.engine.process.GrinderProcess$ThreadStarterImplementation$2.create(GrinderProcess.java:784) ~[grinder-core-3.11.jar:na]
at net.grinder.engine.process.GrinderThread.run(GrinderThread.java:90) ~[grinder-core-3.11.jar:na]
at java.lang.Thread.run(Thread.java:744) [na:1.7.0_45]
2015-10-14 18:42:40,132 ERROR com-0 thread-3: aborting thread - {}The result of 'TestRunner()' is not callable
net.grinder.scriptengine.jython.JythonScriptExecutionException: The result of 'TestRunner()' is not callable
at net.grinder.scriptengine.jython.JythonScriptEngine.createWorkerRunnable(JythonScriptEngine.java:183) ~[grinder-core-3.11.jar:na]
at net.grinder.engine.process.GrinderProcess$ThreadStarterImplementation$2.create(GrinderProcess.java:784) ~[grinder-core-3.11.jar:na]
at net.grinder.engine.process.GrinderThread.run(GrinderThread.java:90) ~[grinder-core-3.11.jar:na]
at java.lang.Thread.run(Thread.java:744) [na:1.7.0_45]
I modify script, but still error. Please help check it.
I test script :
# The sorting tes supports a configurable array length.
# It runs the JavaTest.sort method of the JavaTest class.
from net.grinder.script.Grinder import grinder
from net.grinder.script import Test
from datetime import datetime
from datetime import timedelta
from java.sql import DriverManager
from oracle.jdbc import OracleDriver
########################################
#
# main body of test script starts here
#
########################################
# Get the propeties to access test configuration information
properties = grinder.getProperties()
# The description is a property (instead of a hardcoded string in this script)
#test = Test(1, properties.get("javatest.description"))
test = Test(2, properties.get("javatest.description"))
# select the method for which to collect information
# test.record(WriteMulitpleLittleFile.write)
# initialize data for compressing
# fileName = properties.get("javatest.fileToCompress")
# grinder.logger.info("data file to compress is " + fileName)
# JavaTest.initializeCompression(fileName)
# If the run mode is runOnce, the TestRunner class will
# run once. Otherwise, if the run mode is continuous,
# the TestRunner class will run the test for at least
# the specified duration (but possibly longer)
runMode = properties.get("javatest.runMode")
#WriteMulitpleLittleFile.setParameters(dir, fileSize...)
if runMode == "continuous":
# figure out how long to run the test
m = int(properties.getProperty("javatest.durationMinutes", "0"))
h = int(properties.getProperty("javatest.durationHours", "0"))
d = int(properties.getProperty("javatest.durationDays", "0"))
duration = timedelta(minutes=m,hours=h,days=d)
grinder.logger.info("run mode is continuous, duration is " + str(duration))
elif runMode == "runOnce":
grinder.logger.info("run mode is run once")
duration = timedelta(minutes=0)
else:
grinder.logger.info("run mode not set or not recongized, default to run once")
duration = timedelta(minutes=0)
########################################
#
# The TestRunner class is used by The Grinder to perform the test
#
########################################
#test1 = Test(1, "Database insert")
test2 = Test(2, "Database query")
# Load the Oracle JDBC driver.
DriverManager.registerDriver(OracleDriver())
def getConnection():
return DriverManager.getConnection(
"jdbc:oracle:thin:#den00bvr.us.oracle.com:1521:orcl", "PBPUBLIC", "PBPUBLIC")
def ensureClosed(object):
try: object.close()
except: pass
# One time initialisation that cleans out old data.
connection = getConnection()
statement = connection.createStatement()
#try: statement.execute("drop table grinder_test1126")
#except: pass
#statement.execute("create table grinder_test1126(thread number, run number)")
ensureClosed(statement)
ensureClosed(connection)
class TestRunner:
def __init__(self):
# tid = grinder.threadNumber
# if (grinder.threadNumber % 2 == 0):
# Even threadNumber
# Do insertStatement
# else:
# Odd threadNumber
# Do queryStatement
# def __call__(self):
# self.testRunner()
endTime = datetime.now() + duration
notDone = True
while notDone:
connection = None
insertStatement = None
queryStatement = None
notDone = datetime.now() < endTime
try:
connection = getConnection()
# insertStatement = connection.createStatement()
queryStatement = connection.createStatement()
# test1.record(insertStatement)
# insertStatement.execute("insert into grinder_test1126 values(%d, %d)" %
# (grinder.threadNumber, grinder.runNumber))
test2.record(queryStatement)
queryStatement.execute("select * from employee")
finally:
# ensureClosed(insertStatement)
ensureClosed(queryStatement)
ensureClosed(connection)

According to the documentation,
The TestRunner instance must be callable
A Python object is callable if it defines a call method. Each
worker thread performs a number of runs of the test script, as
configured by the property grinder.runs. For each run, the worker
thread calls its TestRunner; thus the call method can be thought
of as the definition of a run.
Your script requires a call function in order to be classified as callable.

what's wrong with this python code

This is a code with Web crawler.
I'm a beginer in learning python.So i don't know how to solve.
It seems wrong with search()
# -*- coding:utf-8 -*-
import urllib,urllib2,re
class BDTB:
def __init__(self,baseUrl,seeLZ):
self.baseUrl = baseUrl
self.seeLZ = '?see_lz' + str(seeLZ)
def getPage(self,pageNum):
try:
url = self.baseUrl + self.seeLZ + '&pn=' + str(pageNum)
request = urllib2.Request(url)
response = urllib2.urlopen(request)
#print response.read().decode('utf-8')
return response
except urllib2.URLError,e:
if hasattr(e,'reason'):
print u'连接百度贴吧失败，错误原因',e.reason
return None
def getTitle(self):
page = self.getPage(1)
pattern = re.compile('<h3 class.*?px">(.*?)</h3>',re.S)
result = re.search(pattern,page)
if result:
print result.group(1)
return result.group(1).strip()
else:
return None
baseURL = 'http://tieba.baidu.com/p/4095047339'
bdtb = BDTB(baseURL,1)
bdtb.getTitle()

This will raise a TypeError: expected string or buffer because you are passing the object returned from urllib2.urlopen(request) to re.search() when it requires an str.
If you change the return value from:
return responce # returns the object
to one that returns the text contained in the request:
return responce.read() # returns the text contained in the responce
Your script works and after executing it returns:
广告兼职及二手物品交易集中贴
Additionally, since you're working with Python 2.x you might want to change you object from class BDTB: to class BDTB(object) in order to use new style classes.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How do I properly test a Scrapy spider Python generator function? - python

Related

Mock.patch returning MagicMock object causing AssertionError?

Flask loop takes long time to complete

Python Mock: Result of method call is getitem instead of actual value

Grinder JDBC test script error" The result of 'TestRunner()' is not callable"

what's wrong with this python code

Categories

Resources

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How do I properly test a Scrapy spider Python generator function? - python

Related

Mock.patch returning MagicMock object causing AssertionError?

Flask loop takes long time to complete

Python Mock: Result of method call is __getitem__ instead of actual value

Grinder JDBC test script error" The result of 'TestRunner()' is not callable"

what's wrong with this python code

Categories

Resources

Python Mock: Result of method call is getitem instead of actual value