This question already has answers here:
Flask view return error "View function did not return a response"
(3 answers)
Closed 11 months ago.
yesterday I started to learn Flask and got into a pitfall for redirection with a variable. I tried without session as well but can't get ahead.
Code is as below -
from flask import Flask, render_template, request, redirect, url_for, session
from flask_wtf import Form
from wtforms import StringField
app = Flask(__name__)
app.config['SECRET_KEY'] = 'our very hard to guess secretfir'
#app.route('/')
def index():
return render_template('index.html')
#app.route('/thank-you')
def thank_you():
if request.method == 'POST':
messages = request.args['translated_text'] # counterpart for url_for()
messages = session['translated_text'] # counterpart for session
return render_template('thank-you.html',messages=messages)
def translate_text(target, text):
import six
from google.cloud import translate_v2 as translate
translate_client = translate.Client()
if isinstance(text, six.binary_type):
text = text.decode("utf-8")
# Text can also be a sequence of strings, in which case this method
# will return a sequence of results for each text.
result = translate_client.translate(text, target_language=target)
output_text = format(result["translatedText"])
return output_text
# approach using WTForms
class RegistrationForm(Form):
input_lang = StringField('Input Language in length(2) ==> ')
output_lang = StringField('Output Language in length(2) ==> ')
input_text = StringField('Input Text ==> ')
#app.route('/translate', methods=['GET', 'POST'])
def translate():
error = ""
form = RegistrationForm(request.form)
if request.method == 'POST':
input_lang = form.input_lang.data
output_lang = form.output_lang.data
input_text = form.input_text.data
if len(input_lang) != 2 or len(output_lang) != 2 or len(input_text) == 0:
error = "Please supply proper inputs! "
else:
translated_text = translate_text(output_lang, input_text)
session['translated_text'] = translated_text
return redirect(url_for('thank_you',transalted_text=translated_text))
return render_template('translate.html', form=form, message=error)
# Run the application
app.run(debug=True)
Whenever, I submit \translate.html, I get an error as :
127.0.0.1 - - [03/Apr/2022 13:35:04] "GET /thank-you?transalted_text=salut HTTP/1.1" 500 -
Traceback (most recent call last):
File "C:\Dev\Python\Python310\lib\site-packages\flask\app.py", line 2095, in __call__
return self.wsgi_app(environ, start_response)
File "C:\Dev\Python\Python310\lib\site-packages\flask\app.py", line 2080, in wsgi_app
response = self.handle_exception(e)
File "C:\Dev\Python\Python310\lib\site-packages\flask\app.py", line 2077, in wsgi_app
response = self.full_dispatch_request()
File "C:\Dev\Python\Python310\lib\site-packages\flask\app.py", line 1526, in full_dispatch_request
return self.finalize_request(rv)
File "C:\Dev\Python\Python310\lib\site-packages\flask\app.py", line 1545, in finalize_request
response = self.make_response(rv)
File "C:\Dev\Python\Python310\lib\site-packages\flask\app.py", line 1701, in make_response
raise TypeError(
TypeError: The view function for 'thank_you' did not return a valid response. The function either returned None or ended without a return statement.
127.0.0.1 - - [03/Apr/2022 13:35:04] "GET /thank-you?__debugger__=yes&cmd=resource&f=style.css HTTP/1.1" 304 -
127.0.0.1 - - [03/Apr/2022 13:35:04] "GET /thank-you?__debugger__=yes&cmd=resource&f=debugger.js HTTP/1.1" 304 -
127.0.0.1 - - [03/Apr/2022 13:35:04] "GET /thank-you?__debugger__=yes&cmd=resource&f=console.png HTTP/1.1" 304 -
TypeError: The view function for 'thank_you' did not return a valid response. The function either returned None or ended without a return statement.
If you look at the thank_you func, it only knows how to handle a POST request, but in case of GET is returning None
#app.route('/thank-you')
def thank_you():
if request.method == 'POST':
messages = request.args['translated_text'] # counterpart for url_for()
messages = session['translated_text'] # counterpart for session
return render_template('thank-you.html',messages=messages)
# move the logic for GET request here
return {'msg': 'example'} # I asume that you are working with flask 2.0
And now you are returning for a GET request.
And if you are on flask 2.0, you could also specify the http method in the app decorator. For more clarity:
#app.get('/thank-you')
def thank_you():
return 'Thank you'
I am running all the sql scripts under the scripts path in a for loop and copying the data into #priya_stage area in snowflake and then using GET command , i am unloading data from stage area to my Unix path in csv format. But I am getting error.
Note: this same code works on my MAC but not on unix server.
import logging
import os
import snowflake.connector
from snowflake.connector import DictCursor as dict
from os import walk
try:
conn = snowflake.connector.connect(
account = 'xxx' ,
user = 'xxx' ,
password = 'xxx' ,
database = 'xxx' ,
schema = 'xxx' ,
warehouse = 'xxx' ,
role = 'xxx' ,
)
conn.cursor().execute('USE WAREHOUSE xxx')
conn.cursor().execute('USE DATABASE xxx')
conn.cursor().execute('USE SCHEMA xxx')
take = []
scripts = '/xxx/apps/xxx/xxx/scripts/snow/scripts/'
os.chdir('/xxx/apps/xxx/xxx/scripts/snow/scripts/')
for root , dirs , files in walk(scripts):
for file in files:
inbound = file[0:-4]
sql = open(file , 'r').read()
# file_number = 0
# file_number += 1
file_prefix = 'bridg_' + inbound
file_name = file_prefix
result_query = conn.cursor(dict).execute(sql)
query_id = result_query.sfqid
sql_copy_into = f'''
copy into #priya_stage/{file_name}
from (SELECT * FROM TABLE(RESULT_SCAN('{query_id}')))
DETAILED_OUTPUT = TRUE
HEADER = TRUE
SINGLE = FALSE
OVERWRITE = TRUE
max_file_size=4900000000'''
rs_copy_into = conn.cursor(dict).execute(sql_copy_into)
for row_copy in rs_copy_into:
file_name_in_stage = row_copy["FILE_NAME"]
sql_get_to_local = f"""
GET #priya_stage/{file_name_in_stage} file:///xxx/apps/xxx/xxx/inbound/zip_files/{inbound}/"""
rs_get_to_local = conn.cursor(dict).execute(sql_get_to_local)
except snowflake.connector.errors.ProgrammingError as e:
print('Error {0} ({1}): {2} ({3})'.format(e.errno , e.sqlstate , e.msg , e.sfqid))
finally:
conn.cursor().close()
conn.close()
Error
Traceback (most recent call last):
File "Generic_local.py", line 52, in <module>
rs_get_to_local = conn.cursor(dict).execute(sql_get_to_local)
File "/usr/local/lib64/python3.6/site-packages/snowflake/connector/cursor.py", line
746, in execute
sf_file_transfer_agent.execute()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/file_transfer_agent.py", line 379, in execute
self._transfer_accelerate_config()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/file_transfer_agent.py", line 671, in
_transfer_accelerate_config
self._use_accelerate_endpoint = client.transfer_accelerate_config()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/s3_storage_client.py", line 572, in
transfer_accelerate_config
url=url, verb="GET", retry_id=retry_id, query_parts=dict(query_parts)
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/s3_storage_client.py", line 353, in _.
send_request_with_authentication_and_retry
verb, generate_authenticated_url_and_args_v4, retry_id
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/storage_client.py", line 313, in
_send_request_with_retry
f"{verb} with url {url} failed for exceeding maximum retries."
snowflake.connector.errors.RequestExceedMaxRetryError: GET with url b'https://xxx-
xxxxx-xxx-x-customer-stage.xx.amazonaws.com/https://xxx-xxxxx-xxx-x-customer-
stage.xx.amazonaws.com/?accelerate' failed for exceeding maximum retries.
This link redirects me to a error message .
https://xxx-
xxxxx-xxx-x-customer-stage.xx.amazonaws.com/https://xxx-xxxxx-xxx-x-customer-
stage.xx.amazonaws.com/?accelerate
Access Denied error :
<Error>
<Code>AccessDenied</Code>
<Message>Access Denied</Message>
<RequestId>1X1Z8G0BTX8BAHXK</RequestId>
<HostId>QqdCqaSK7ogAEq3sNWaQVZVXUGaqZnPv78FiflvVzkF6nSYXTSKu3iSiYlUOU0ka+0IMzErwGC4=</HostId>
</Error>
I have a web app (using flask and python) that can read a QR code from a live video stream (using opencv). Is it possible to save the data that has been read to my MySQL database without stopping the stream? Every time I try to call the function to save the data, the stream hangs, giving me a NoneType error. I tried calling the same function outside the stream and it works.
In my model.py, I have this function. :
def saveAttendance(qr_code):
cursor = mysql.connection.cursor(MySQLdb.cursors.DictCursor)
cursor.execute('SELECT user_id, first_name, last_name FROM users WHERE qr_code = %s', [qr_code])
user_record = cursor.fetchone()
user_id = user_record['user_id']
name = user_record['first_name'] + " " + user_record['last_name']
cursor.execute('INSERT INTO attendance VALUES (NULL, %s, NOW(), NOW())', [user_id])
mysql.connection.commit()
cursor.close()
return name
in my views.py, I have this:
#views.route('/MaskandQRCodeDetection')
def mask():
return render_template('MaskandQRCodeDetection.html', username=session['username'])
def gen(camera):
while True:
data = camera.get_frame()
frame = data[0]
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')
#views.route('/video_feed')
def video_feed():
return Response(gen(VideoCamera()), mimetype='multipart/x-mixed-replace; boundary=frame')
And in my camera.py, I have this:
class VideoCamera(object):
# initialize the video stream
def __init__(self):
self.stream = VideoStream(src=0).start()
def __del__(self):
self.stream.stop()
def get_frame(self):
.
.
.
.
#And this part reads the qr code:
for barcode in decode(frame):
global prev, myOutput, myData
myData = barcode.data.decode('utf-8')
#I saved all the QR code in my database to qr_list
if myData in qr_list:
# check if mask is detected and if the qr_code is not a duplicate
if label == 'Mask' and prev != myData:
# set myOutput with the name of the detected user
#This is where I have the problem
myOutput = saveAttendance(myData)
myColor = (0, 255, 0)
# update previous qr_code with the current qr_code
prev = myData
else:
myOutput = 'Not recognized'
myColor = (0, 0, 255)
It gives me the following error when I call the function inside the videostream
Debugging middleware caught exception in streamed response at a point where response headers were already sent.
Traceback (most recent call last):
File "C:\Users\Acer\AppData\Local\Programs\Python\Python38\Lib\site-packages\werkzeug\wsgi.py", line 506, in __next__
return self._next()
File "C:\Users\Acer\AppData\Local\Programs\Python\Python38\Lib\site-packages\werkzeug\wrappers\base_response.py", line 45, in _iter_encoded
for item in iterable:
File "C:\AI\website\views.py", line 78, in gen
data = camera.get_frame()
File "C:\AI\website\camera.py", line 157, in get_frame
myOutput = saveAttendance(myData)
File "C:\AI\website\model.py", line 117, in saveAttendance
cursor = mysql.connection.cursor(MySQLdb.cursors.DictCursor)
AttributeError: 'NoneType' object has no attribute 'cursor'
Thank you so much. Any response will be greatly appreciated.
Solved this by creating an app context inside the model.py function. It turned out that for some reason mysql.connection is returning none.
def saveAttendance(qr_code):
with app.app_context():
.
.
.
I am trying to write a Python model which is capable of doing some processing in a PostgreSQL database using the multi-threading module and peewee.
In single core mode the code works, however, when I try to run the code with multiple cores I am running into a SSL error.
I would like to post the structure of my model in the hope that somebody can advice how to set of my model in a proper way. Currently, I have chosen to use an object oriented approach in which I make one connection which is shared in a pool. To clarify what I have done, I will now show the source code I have so far
I have three files: main.py, models.py and parser.py. The contents is the following
models.py defines the peewee postgresql table and makes a connection to the postgres server
import peewee as pw
from playhouse.pool import PooledPostgresqlExtDatabase
KVK_KEY = "id_number"
NAME_KEY = "name"
N_VOWELS_KEY = "n_vowels"
# initialise the data base
database = PooledPostgresqlExtDatabase(
"testdb", user="postgres", host="localhost", port=5432, password="xxxx",
max_connections=8, stale_timeout=300 )
class BaseModel(pw.Model):
class Meta:
database = database
only_save_dirty = True
# this class describes the format of the sql data base
class Company(BaseModel):
id_number = pw.IntegerField(primary_key=True)
name = pw.CharField(null=True)
n_vowels = pw.IntegerField(default=-1)
processor = pw.IntegerField(default=-1)
def connect_database(database_name, reset_database=False):
""" connect the database """
database.connect()
if reset_database:
database.drop_tables([Company])
database.create_tables([Company])
parser.py contains the CompanyParser class which is used as the engine of the code to do all the processing. It generates some artificial data which is stored to the postgresql database and then the run method is used to do some processing with the data already stored in the database
import pandas as pd
import numpy as np
import random
import string
import peewee as pw
from models import (Company, database, KVK_KEY, NAME_KEY)
import multiprocessing as mp
MAX_SQL_CHUNK = 1000
np.random.seed(0)
def random_name(size=8, chars=string.ascii_lowercase):
""" Create a random character string of 'size' characters """
return "".join(random.choice(chars) for _ in range(size))
def vowel_count(characters):
"""
Count the number of vowels in the string 'characters' and return as an integer
"""
count = 0
for char in characters:
if char in list("aeiou"):
count += 1
return count
class CompanyParser(mp.Process):
def __init__(self, number_of_companies=100, i_proc=None,
number_of_procs=1,
first_id=None, last_id=None):
if i_proc is not None and number_of_procs > 1:
mp.Process.__init__(self)
self.i_proc = i_proc
self.number_of_procs = number_of_procs
self.n_companies = number_of_companies
self.data_df: pd.DataFrame = None
self.first_id = first_id
self.last_id = last_id
def generate_data(self):
""" Create a dataframe with fake company data and id's """
id_list = np.random.randint(1000000, 9999999, self.n_companies)
company_list = np.array([random_name() for _ in range(self.n_companies)])
self.data_df = pd.DataFrame(data=np.vstack([id_list, company_list]).T,
columns=[KVK_KEY, NAME_KEY])
self.data_df.sort_values([KVK_KEY], inplace=True)
def store_to_database(self):
"""
Store the company data to a sql database
"""
record_list = list(self.data_df.to_dict(orient="index").values())
n_batch = int(len(record_list) / MAX_SQL_CHUNK) + 1
with database.atomic():
for cnt, batch in enumerate(pw.chunked(record_list, MAX_SQL_CHUNK)):
print(f"writing {cnt}/{n_batch}")
Company.insert_many(batch).execute()
def run(self):
print("Making query at {}".format(self.i_proc))
query = (Company.
select().
where(Company.id_number.between(self.first_id, self.last_id)))
print("Found {} companies".format(query.count()))
for cnt, company in enumerate(query):
print("Processing # {} - {}: company {}/{}".format(self.i_proc, cnt,
company.id_number,
company.name))
number_of_vowels = vowel_count(company.name)
company.n_vowels = number_of_vowels
company.processor = self.i_proc
print(f"storing number of vowels: {number_of_vowels}")
company.save()
Finally, my main script load the class stored in the models.py and parser.py and launches the code.
from models import (Company, connect_database)
from parser import CompanyParser
number_of_processors = 2
connect_database(None, reset_database=True)
# init an object of the CompanyParser and use the create database
parser = CompanyParser()
company_ids = Company.select(Company.id_number)
parser.generate_data()
parser.store_to_database()
n_companies = company_ids.count()
n_comp_per_proc = int(n_companies / number_of_processors)
print("Found {} companies: {} per proc".format(n_companies, n_comp_per_proc))
for i_proc in range(number_of_processors):
i_start = i_proc * n_comp_per_proc
first_id = company_ids[i_start]
last_id = company_ids[i_start + n_comp_per_proc - 1]
print(f"Running proc {i_proc} for id {first_id} until id {last_id}")
sub_parser = CompanyParser(first_id=first_id, last_id=last_id,
i_proc=i_proc,
number_of_procs=number_of_processors)
if number_of_processors > 1:
sub_parser.start()
else:
sub_parser.run()
In case that the number_of_processors = 1 this script works perfectly fine. It generates artificial data, stores it to the PostgreSQL database and does some processing on the data (it counts the number of vowels in the name and stores it to the n_vowels column)
However, in case I am trying to run this with 2 cores with number_of_processors = 2, I run into the following error
/opt/miniconda3/bin/python /home/eelco/PycharmProjects/multiproc_peewee/main.py
writing 0/1
Found 100 companies: 50 per proc
Running proc 0 for id 1020737 until id 5295565
Running proc 1 for id 5302405 until id 9891087
Making query at 0
Found 50 companies
Processing # 0 - 0: company 1020737/wqrbgxiu
storing number of vowels: 2
Making query at 1
Process CompanyParser-1:
Processing # 0 - 1: company 1086107/lkbagrbc
storing number of vowels: 1
Processing # 0 - 2: company 1298367/nsdjsqio
storing number of vowels: 2
Traceback (most recent call last):
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 2714, in execute_sql
cursor.execute(sql, params or ())
psycopg2.OperationalError: SSL error: sslv3 alert bad record mac
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/miniconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/eelco/PycharmProjects/multiproc_peewee/parser.py", line 82, in run
company.save()
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 5748, in save
rows = self.update(**field_dict).where(self._pk_expr()).execute()
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 1625, in inner
return method(self, database, *args, **kwargs)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 1696, in execute
return self._execute(database)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 2121, in _execute
cursor = database.execute(self)
File "/opt/miniconda3/lib/python3.7/site-packages/playhouse/postgres_ext.py", line 468, in execute
cursor = self.execute_sql(sql, params, commit=commit)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 2721, in execute_sql
self.commit()
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 2512, in __exit__
reraise(new_type, new_type(*exc_args), traceback)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 186, in reraise
raise value.with_traceback(tb)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 2714, in execute_sql
cursor.execute(sql, params or ())
peewee.OperationalError: SSL error: sslv3 alert bad record mac
Process CompanyParser-2:
Traceback (most recent call last):
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 2714, in execute_sql
cursor.execute(sql, params or ())
psycopg2.OperationalError: SSL error: decryption failed or bad record mac
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/miniconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/eelco/PycharmProjects/multiproc_peewee/parser.py", line 72, in run
print("Found {} companies".format(query.count()))
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 1625, in inner
return method(self, database, *args, **kwargs)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 1881, in count
return Select([clone], [fn.COUNT(SQL('1'))]).scalar(database)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 1625, in inner
return method(self, database, *args, **kwargs)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 1866, in scalar
row = self.tuples().peek(database)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 1625, in inner
return method(self, database, *args, **kwargs)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 1853, in peek
rows = self.execute(database)[:n]
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 1625, in inner
return method(self, database, *args, **kwargs)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 1696, in execute
return self._execute(database)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 1847, in _execute
cursor = database.execute(self)
File "/opt/miniconda3/lib/python3.7/site-packages/playhouse/postgres_ext.py", line 468, in execute
cursor = self.execute_sql(sql, params, commit=commit)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 2721, in execute_sql
self.commit()
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 2512, in __exit__
reraise(new_type, new_type(*exc_args), traceback)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 186, in reraise
raise value.with_traceback(tb)
File "/opt/miniconda3/lib/python3.7/site-packages/peewee.py", line 2714, in execute_sql
cursor.execute(sql, params or ())
peewee.OperationalError: SSL error: decryption failed or bad record mac
Process finished with exit code 0
Somehow something goes wrong as soon as the second thread start to do something with the database. Does somebody has advice to get this code working. I have tried the following already
Try the PooledPostgresDatabase and normal PostgresqlDatabase to
connect to the database. This leads to the same error
Try using sqlite in stead of postgres. This works for 2 cores, but only if the two processes are not interfering too much; otherwise I
can some locking problems. I was in the impression that postgres
would be better for doing multiprocessing then sqlite (is that true?)
When putting a break after launching the first process(so effectively using only one core), the code works, showing that the start method is called correctly.
Hopefully somebody can advise.
Regards
Eelco
After some searching on the internet today I found the solution for my problem here:github.com/coleifer. As coleifer mentions: you apparently first have to set up all the forks before you start connecting to the database. Based on this idea I have modified my code and it is working now.
For those interested I will post my python scripts again so you can see how I did it. This because I there is not so much explicit examples out there, so perhaps it may help others.
First of all, all the database and peewee modules are now moved into initialization functions which are only called inside the constructor of the CompanyParser class.
So models.py looks like
import peewee as pw
from playhouse.pool import PooledPostgresqlExtDatabase, PostgresqlDatabase, PooledPostgresqlDatabase
KVK_KEY = "id_number"
NAME_KEY = "name"
N_VOWELS_KEY = "n_vowels"
def init_database():
db = PooledPostgresqlDatabase(
"testdb", user="postgres", host="localhost", port=5432, password="xxxxx",
max_connections=8, stale_timeout=300)
return db
def init_models(db, reset_tables=False):
class BaseModel(pw.Model):
class Meta:
database = db
# this class describes the format of the sql data base
class Company(BaseModel):
id_number = pw.IntegerField(primary_key=True)
name = pw.CharField(null=True)
n_vowels = pw.IntegerField(default=-1)
processor = pw.IntegerField(default=-1)
if db.is_closed():
db.connect()
if reset_tables and Company.table_exists():
db.drop_tables([Company])
db.create_tables([Company])
return Company
Then, the worker class CompanyParser is defined in the parser.py script and looks like this
import multiprocessing as mp
import random
import string
import numpy as np
import pandas as pd
import peewee as pw
from models import (KVK_KEY, NAME_KEY, init_database, init_models)
MAX_SQL_CHUNK = 1000
np.random.seed(0)
def random_name(size=32, chars=string.ascii_lowercase):
""" Create a random character string of 'size' characters """
return "".join(random.choice(chars) for _ in range(size))
def vowel_count(characters):
"""
Count the number of vowels in the string 'characters' and return as an integer
"""
count = 0
for char in characters:
if char in list("aeiou"):
count += 1
return count
class CompanyParser(mp.Process):
def __init__(self, reset_tables=False,
number_of_companies=100, i_proc=None,
number_of_procs=1, first_id=None, last_id=None):
if i_proc is not None and number_of_procs > 1:
mp.Process.__init__(self)
self.i_proc = i_proc
self.reset_tables = reset_tables
self.number_of_procs = number_of_procs
self.n_companies = number_of_companies
self.data_df: pd.DataFrame = None
self.first_id = first_id
self.last_id = last_id
# initialise the database and models
self.database = init_database()
self.Company = init_models(self.database, reset_tables=self.reset_tables)
def generate_data(self):
""" Create a dataframe with fake company data and id's and return the array of id's"""
id_list = np.random.randint(1000000, 9999999, self.n_companies)
company_list = np.array([random_name() for _ in range(self.n_companies)])
self.data_df = pd.DataFrame(data=np.vstack([id_list, company_list]).T,
columns=[KVK_KEY, NAME_KEY])
self.data_df.drop_duplicates([KVK_KEY], inplace=True)
self.data_df.sort_values([KVK_KEY], inplace=True)
return self.data_df[KVK_KEY].values
def store_to_database(self):
"""
Store the company data to a sql database
"""
record_list = list(self.data_df.to_dict(orient="index").values())
n_batch = int(len(record_list) / MAX_SQL_CHUNK) + 1
with self.database.atomic():
for cnt, batch in enumerate(pw.chunked(record_list, MAX_SQL_CHUNK)):
print(f"writing {cnt}/{n_batch}")
self.Company.insert_many(batch).execute()
def run(self):
query = (self.Company.
select().
where(self.Company.id_number.between(self.first_id, self.last_id)))
for cnt, company in enumerate(query):
print("Processing # {} - {}: company {}/{}".format(self.i_proc, cnt, company.id_number,
company.name))
number_of_vowels = vowel_count(company.name)
company.n_vowels = number_of_vowels
company.processor = self.i_proc
try:
company.save()
except (pw.OperationalError, pw.InterfaceError) as err:
print("failed save for {} {}: {}".format(self.i_proc, cnt, err))
else:
pass
Finally, the main.py script which launches the processes:
from parser import CompanyParser
import time
def main():
number_of_processors = 2
number_of_companies = 10000
parser = CompanyParser(number_of_companies=number_of_companies, reset_tables=True)
company_ids = parser.generate_data()
parser.store_to_database()
n_companies = company_ids.size
n_comp_per_proc = int(n_companies / number_of_processors)
print("Found {} companies: {} per proc".format(n_companies, n_comp_per_proc))
if not parser.database.is_closed():
parser.database.close()
processes = list()
for i_proc in range(number_of_processors):
i_start = i_proc * n_comp_per_proc
first_id = company_ids[i_start]
last_id = company_ids[i_start + n_comp_per_proc - 1]
print(f"Running proc {i_proc} for id {first_id} until id {last_id}")
sub_parser = CompanyParser(first_id=first_id, last_id=last_id, i_proc=i_proc,
number_of_procs=number_of_processors)
if number_of_processors > 1:
sub_parser.start()
else:
sub_parser.run()
processes.append(sub_parser)
# this blocks the script until all processes are done
for job in processes:
job.join()
# make sure all the connections are closed
for i_proc in range(number_of_processors):
db = processes[i_proc].database
if not db.is_closed():
db.close()
print("Goodbye!")
if __name__ == "__main__":
start = time.time()
main()
duration = time.time() - start
print(f"Done in {duration} s")
As you can see, the database connection is done per process inside the class.
This example works and is a full example of multiprocessing + peewee and PostgreSQL. Hopefully this may help others. In case you have any comments or suggestions for improvement please let me know.
I did get this error too but with flask + peewee + rq in Heroku. Below is how I solved it:
If you have a simple app that you use with RQ, I would suggest to use SimpleWorker
RQ suggest to use rq.worker.HerokuWorker but I still received a ssl error with this.
The error appeared in a case where I have created a follow-up(chain) tasks, where execution of 1 depends on another tasks success.
Also I am using flask-rq2 but applies to normal usage as well as shown below:
# app.py
app = Flask(__name__)
app.config['RQ_WORKER_CLASS'] = os.getenv('RQ_WORKER_CLASS', 'rq.worker.Worker')
rq = RQ(app)
I solved it by changing the following in heroku config:
set your RQ_WORKER_CLASS to rq.worker.SimpleWorker
Here's the code snippet:
# Get details from the user.
jira_url = str(raw_input("JIRA URL: ")) # https://jira.mydomain.com
jira_prj = str(raw_input("JIRA Project: ")) # MYPROJ
jira_uid = str(raw_input("JIRA ID: ")) # admin
jira_pas = str(getpass.getpass("JIRA Password: ")) # p#s$wOrd!
# JIRA stuff.
jira = JIRA(server=jira_url, basic_auth=(jira_uid, jira_pas))
search_alert_issues = jira.search_issues("project = %s AND issuetype = Alert AND status = Submitted AND reporter in (anonymous)") % jira_prj
search_alert_issues
Objectives:
Connect to JIRA server.
Run the search_alert_issues.
Expectation:
>>> jira = JIRA(server='https://jira.mydomain.com', basic_auth=('admin', 'p#s$wOrd!'))
>>> search_alert_issues = jira.search_issues("project = MYPROJ AND issuetype = Alert AND status = Submitted AND reporter in (anonymous)")
>>> search_alert_issues
[<JIRA Issue: key=u'MYPROJ-35460', id=u'129419'>, <JIRA Issue: key=u'MYPROJ-35459', id=u'129418'>, <JIRA Issue: key=u'MYPROJ-35458', id=u'129417'>, <JIRA Issue: key=u'MYPROJ-35457', id=u'129416'>, <JIRA Issue: key=u'MYPROJ-35456', id=u'129415'>, <JIRA Issue: key=u'MYPROJ-35455', id=u'129414'>, <JIRA Issue: key=u'MYPROJ-35454', id=u'129413'>, <JIRA Issue: key=u'MYPROJ-35453', id=u'129412'>, <JIRA Issue: key=u'MYPROJ-35452', id=u'129411'>, <JIRA Issue: key=u'MYPROJ-35451', id=u'129410'>]
>>>
Reality:
Traceback (most recent call last):
File "./ajat.py", line 82, in <module>
search_alert_issues = jira.search_issues("project = %s AND issuetype = Alert AND status = Submitted AND reporter in (anonymous)") % jira_prj
File "/usr/lib/python2.7/site-packages/jira/client.py", line 1587, in search_issues
resource = self._get_json('search', params=search_params)
File "/usr/lib/python2.7/site-packages/jira/client.py", line 2033, in _get_json
r = self._session.get(url, params=params)
File "/usr/lib/python2.7/site-packages/jira/resilientsession.py", line 78, in get
return self.__verb('GET', url, **kwargs)
File "/usr/lib/python2.7/site-packages/jira/resilientsession.py", line 74, in __verb
raise_on_error(r, verb=verb, **kwargs)
File "/usr/lib/python2.7/site-packages/jira/utils.py", line 120, in raise_on_error
r.status_code, error, r.url, request=request, response=r, **kwargs)
jira.utils.JIRAError: JiraError HTTP 400
text: Error in the JQL Query: The character '%' is a reserved JQL character. You must enclose it in a string or use the escape '\u0025' instead. (line 1, character 11)
Questions:
search_alert_issues should be able to use the value stored in jira_prj variable, right?
But why it isn't working? All jira_* variables are showing their values when I did a simple print command on them.
How am I going to use the value stored in jira_prj?
In
search_alert_issues = jira.search_issues("project = %s AND issuetype = Alert AND status = Submitted AND reporter in (anonymous)") % jira_prj
the % jira_prj should come before the parenthesis closes -
search_alert_issues = jira.search_issues("project = %s AND issuetype = Alert AND status = Submitted AND reporter in (anonymous)" % jira_prj)
Right now, you are sending project = %s AND issuetype = Alert AND status = Submitted AND reporter in (anonymous) to the server, and then formatting the result with the value in jira_prj. When the Jira server sees the %, it complains.
% jura_prj is in wrong position
search_alert_issues = jira.search_issues("project = %s AND issuetype = Alert AND status = Submitted AND reporter in (anonymous)" % jira_prj)