how avoid dublicate data to mysql - python

I write the code to scrape car info(title, make, model, transmission, year, price) data from ebay.com and save in the mysql,
I want if all row's(title, make, model, ...) item's is be similar to another row then avoid to insert this data to the mysql, *only when all row's item be similar(because some title is simialr or some model or...)
code :
import requests
from bs4 import BeautifulSoup
import re
import mysql.connector
conn = mysql.connector.connect(user='root', password='******',
host='127.0.0.1', database='web_scraping')
cursor = conn.cursor()
url = 'https://www.ebay.com/b/Cars-Trucks/6001?_ fsrp=0&_sacat=6001&LH_BIN=1&LH_ItemCondition=3000%7C1000%7C2500&rt=nc&_stpos=95125&Model%2520Year=2020%7C2019%7C2018%7C2017%7C2016%7C2015'
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')
ebay_cars = soup.find_all('li', class_='s-item')
for car_info in ebay_cars:
title_div = car_info.find('div', class_='s-item__wrapper clearfix')
title_sub_div = title_div.find('div', class_='s-item__info clearfix')
title_p = title_sub_div.find('span', class_='s-item__price')
title_tag = title_sub_div.find('a', class_='s-item__link')
title_maker = title_sub_div.find('span', class_='s-item__dynamic s-
item__dynamicAttributes1')
title_model = title_sub_div.find('span', class_='s-item__dynamic s-
item__dynamicAttributes2')
title_trans = title_sub_div.find('span', class_='s-item__dynamic s-
item__dynamicAttributes3')
name_of_car = re.sub(r'\d{4}', '', title_tag.text)
maker_of_car = re.sub(r'Make: ','', title_maker.text)
model_of_car = re.sub(r'Model: ', '', title_model.text)
try:
if title_trans.text.startswith(r'Transmission: '):
trans_of_car = re.sub(r'Transmission: ', '', title_trans.text)
else:
trans_of_car = ''
except AttributeError:
trans_of_car = ''
year_of_car = re.findall(r'\d{4}', title_tag.text)
year_of_car = ''.join(str(x) for x in year_of_car)
price_of_car = title_p.text
print(name_of_car ,trans_of_car )
sql = 'INSERT INTO car_info(Title, Maker, Model, Transmission, Year, Price)
VALUES (%s, %s, %s, %s, %s, %s)'
cursor.execute(sql , (name_of_car, maker_of_car, model_of_car, trans_of_car,
year_of_car, price_of_car))
conn.commit()
conn.close()

One option uses not exists:
insert into car_info (title, maker, model, transmission, year, price)
select v.*
from (select %s title, %s maker, %s model, %s transmission, %s year, %s price) v
where not exists (
select 1
from car_info c
where
(c.title, c.maker, c.model, c.transmission, c.year, c.price)
= (v.title, v.maker, v.model, v.transmission, v.year, v.price)
);
But it would be simpler to create a unique key on all columns of the table, like:
create unique index idx_car_info_uniq
on car_info(title, maker, model, transmission, year, price);
This prevents any process from inserting duplicates in the table. You can elegantly ignore the erros that would otherwise have been raised with the on duplicate key syntax:
insert into car_info (title, maker, model, transmission, year, price)
values (%s, %s, %s, %s, %s, %s)
on duplicate key update title = values(title);

You could save the result of this query into a variable
SELECT COUNT(*) FROM car_info WHERE Title = <titleValue>, Maker = <makerValue>, Model = <modelValue>, Transmission = <transmisionValue>, Year = <yearValue>, Price = <priceValue>
and then, if the value of the variable is
1, you skip the INSERT because you already have this entry in the table
0, you make the INSERT because you do not have that entry in the table
It's just one way of doing this.

declare the primary key as all the columns in the table. See: https://www.mysqltutorial.org/mysql-primary-key/

Related

python - Error entering data into the database

A new member has been registered in the database, but when entering the profile modification page and adding other information, it is registered in a new row and not in the same information as the registered member.
register new member :
def add_users(self):
add_fullname = self.line_fullname.text()
add_username = self.line_username.text()
add_Email = self.line_email.text()
add_password = self.line_password.text()
add_phone = self.line_telephon.text()
add_birthday = self.dateofbirth.text()
add_profession = self.line_profession.text()
add_Country = self.line_country.text()
add_keyone = self.keyone.text()
add_keytwo = self.keytwo.text()
add_newpassword = self.new_pwd.text()
self.cur.execute(''' SELECT add_username FROM users=add_username''')
data = self.cur.fetchall()
self.cur.execute(''' INSERT INTO users(add_fullname,
add_username, add_Email, add_password, add_phone,
add_birthday, add_profession, add_Country, add_keyone, add_keytwo, add_newpassword)
VALUE (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s ) ''',
(add_fullname, add_username, add_Email, add_password, add_phone, add_birthday,
add_profession, add_Country, add_keyone, add_keytwo, add_newpassword))
self.mysql_db.commit()
self.line_fullname.clear()
self.line_username.clear()
self.line_email.clear()
self.line_password.clear()
this code for fill profile :
def save_profil(self):
self.tabWidget.setCurrentIndex(6)
add_fullname = self.line_fullname.text()
add_Email = self.line_email.text()
add_phone = self.line_telephon.text()
add_birthday = self.dateofbirth.text()
add_profession = self.line_profession.text()
add_Country = self.line_country.text()
add_keyone = self.keyone.text()
add_keytwo = self.keytwo.text()
add_newpassword = self.new_pwd.text()
self.cur.execute(''' INSERT INTO users(add_fullname, add_Email, add_phone, add_birthday, add_profession,
add_Country, add_keyone, add_keytwo, add_newpassword)
VALUE (%s, %s, %s, %s, %s, %s, %s, %s, %s ) ''',
(add_fullname, add_Email,
add_phone, add_birthday, add_profession,
add_Country, add_keyone, add_keytwo, add_newpassword))
self.mysql_db.commit()
print("profil edited")
You should use UPDATE instead of INSERT in the "save_profil" function if the profile already exists.
UPDATE table_name
SET column1 = value1, column2 = value2, ...
WHERE condition;
You will also need to indicate which user should be updated in
WHERE condition
I would also advise you to read the user ID or another unique key you have defined in your table. So you can update the correct user.
Ex:
def save_profil(self):
self.tabWidget.setCurrentIndex(6)
add_fullname = self.line_fullname.text()
add_Email = self.line_email.text()
add_phone = self.line_telephon.text()
add_birthday = self.dateofbirth.text()
add_profession = self.line_profession.text()
add_Country = self.line_country.text()
add_keyone = self.keyone.text()
add_keytwo = self.keytwo.text()
add_newpassword = self.new_pwd.text()
query = "UPDATE users SET add_fullname=%s, add_Email=%s, add_phone=%s, add_birthday=%s, add_profession=%s, add_Country=%s, add_keyone=%s, add_keytwo=%s, add_newpassword= %s WHERE (UNIQUE_KEY = %s)"
data = (add_fullname, add_Email,add_phone, add_birthday, add_profession,add_Country, add_keyone, add_keytwo, add_newpassword, UNIQUE_KEY_VALUE)
self.cur.execute(query,data)
self.mysql_db.commit()
print("profil edited")
Note that UNIQUE_KEY and UNIQUE_KEY_VALUE are suggestions, you must define these values according to your model.
As I can see in the figure that shows the database, you can use the "id" to know which user should be updated.For this case, UNIQUE_KEY would be replaced by id, and UNIQUE_KEY_VALUE would be the user id that needs to be updated, in your example it would be 14, but you need to get that value inside your function, just like you got the other values.

Python Executing SQL fails over due to non existing field in json data

I am getting some JSON data from a third party API. I am trying to add that data into my own database to be used for a website. I loop through each record in the JSON and execute a SQL query to insert that data into my database. However some records in the JSON data doesn't exist, and therefore causes my query to fail. I have set defaults for these fields for this reason however it still falls over.
isNonFoilOnly field will only appear in some of of the records in the JSON data.
models.py
class Set(models.Model):
code = models.CharField(max_length=100, unique=True)
keyrune_code = models.CharField(max_length=100)
name = models.CharField(max_length=100)
type = models.CharField(max_length=100)
release_date = models.DateField()
base_set_size = models.IntegerField()
total_set_size = models.IntegerField()
is_online_only = models.BooleanField(default=False)
is_non_foil_only = models.BooleanField(default=False)
is_foil_only = models.BooleanField(default=False)
sale_status = models.BooleanField(default=False)
def __str__(self):
return self.name
views.py
response = requests.request("GET", "https://mtgjson.com/api/v5/SetList.json")
data = response.json()["data"]
sorted_obj = sorted(data, key=lambda k: k['releaseDate'], reverse=False)
sql = """
INSERT INTO dashboard_set
(code, keyrune_code, name, type, release_date, base_set_size, total_set_size, is_online_only, is_non_foil_only, is_foil_only, sale_status)
VALUES
( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )
ON CONFLICT (code) DO UPDATE
SET keyrune_code = %s,
name = %s,
type = %s,
release_date = %s,
base_set_size = %s,
total_set_size = %s,
is_online_only = %s,
is_non_foil_only = %s,
is_foil_only = %s;
"""
conn = None
try:
params = config()
conn = psycopg2.connect(**params)
cur = conn.cursor()
for entry in sorted_obj:
cur.execute(sql, (
entry["code"],
entry["keyruneCode"],
entry["name"],
entry["type"],
entry["releaseDate"],
entry["baseSetSize"],
entry["totalSetSize"],
entry["isOnlineOnly"],
entry["isNonFoilOnly"],
entry["isFoilOnly"],
False,
entry["keyruneCode"],
entry["name"],
entry["type"],
entry["releaseDate"],
entry["baseSetSize"],
entry["totalSetSize"],
entry["isOnlineOnly"],
entry["isNonFoilOnly"],
entry["isFoilOnly"]
))
conn.commit()
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
return redirect('dashboard:sets')
You seem to using Django and not using it at the same time. The Django way to do this is:
from yourapp.models import Set
def yourview(request):
response = requests.request("GET", "https://mtgjson.com/api/v5/SetList.json")
data = response.json()["data"]
# Not caring about sort, because why?
for entry in data:
code = data.pop('code', None)
if not code:
continue # or raise
Set.objects.update_or_create(code=code, defaults=data)
return redirect('dashboard:sets')

write multiple foods to DB from JSON

I am currently having major issues writing foods from a JSON to my database. The GET method works just fine. However, when I receive a JSON, it is not properly writing to the database.
What I do is the following
Write a new meal to the database (creates a new meal/cart id via autoincrement)
Utilize the LAST_INSERT_ID() command to write new foods to that meal/cart.
Once I get here writing one food is fine, however if there is numerous foods, I can't seem to get it to write the other foods from the JSON to the database.
The JSON I am receiving is as follows:
Endpoint: /meallog
Request params:{
method: ”post”,
headers: headers,
url: string,
data:{
userId: string,
date: string,
mealData:{
mealName: String,
food: [
{
id:string,
foodname:string,
numCal:int,
servingSize:int,
servingSizeUnit:string,
totalCalories:int
},
{
(repeat above)
}
]
}
}
}
Response: (JSON Object)
{
code: 200/400,
message: String
}
The Code I currently have for my post is below:
elif request.method == 'POST':
jsondata = {}
code={}
user_id = request.json['user_id']
date = request.json['date']
mealName = request.json['mealName']
food_id = request.json['id']
food_name = request.json['foodname']
food_cal = request.json['numCal']
serving_size = request.json['servingSize']
serving_unit = request.json['servingSizeUnit']
totalCal = request.json['totalCalories']
postmeal = conn.cursor()
INS_meal = "INSERT INTO user_cart (user_id, datetime, cart_cal, cart_name) VALUES (%s, %s, %s, %s);"
postmeal.execute(INS_meal, (user_id, date, totalCal, mealName))
conn.commit()
postfood = conn.cursor()
INS_food = "INSERT INTO food_log VALUES (LAST_INSERT_ID(), %s, %s, %s, %s, %s);"
postfood.execute(INS_food, (food_id, food_name, food_cal, serving_size, serving_unit))
conn.commit()
if postfood.execute and postmeal.execute:
code['code'] = '200'
code['message'] = 'Success! INSERTED values into both food_log and user_cart'
else:
code['code'] = '400'
code['message'] = 'Error Connecting to DB. Cant insert into food_log and-or user_cart'
jsondata['code'] = code['code']
jsondata['message'] = code['message']
return(json.dumps(jsondata))
Essentially I'd like it to gather every food that comes in, and write the entire food objects in the array to the food table seamlessly.
I fixed my issue and utilized LAST_INSERT_ID() in a more structured way, as well as to properly loop through the foods incoming.
elif request.method == 'POST':
jsondata = {}
code={}
#with open('inc.json') as json_data:
#d = json.load(json_data)
#print(d)
user_id = request.json['userId']
date = request.json['date']
mealName = request.json['mealData']['mealName']
foodlist = request.json['mealData']['food']
postmeal = conn.cursor()
INS_meal = "INSERT INTO user_cart (user_id, datetime, cart_name) VALUES (%s, %s, %s);"
postmeal.execute(INS_meal, (user_id, date, mealName))
conn.commit()
postmeal.execute("select LAST_INSERT_ID();")
SEL_MEAL_ID = postmeal.fetchone()[0]
food_str = ""
for food in foodlist:
print (food)
food_str+="({}, '{}', '{}', {}, {}, '{}', {}), ".format(SEL_MEAL_ID, food['id'], food['foodname'], food['numCal'], food['servingSize'], food['servingSizeUnit'], food['totalCalories'])
postfood = conn.cursor()
INS_food = "INSERT INTO food_log VALUES {};".format(food_str[:-2]) # To exclude the last comma in the food string
postfood.execute(INS_food)
conn.commit()
if postfood.execute and postmeal.execute:
code['code'] = '200'
code['message'] = 'Success! INSERTED values into both food_log and user_cart'
else:
code['code'] = '400'
code['message'] = 'Error Connecting to DB. Cant insert into food_log and-or user_cart'
jsondata['code'] = code['code']
jsondata['message'] = code['message']
return(json.dumps(jsondata))

How to store data into database using PyMYSQL in python

I am scraping a website and getting the companies details from it, Now I trying to store the data into database. But I am getting some error like
raise InternalError(errno, errorvalue)
pymysql.err.InternalError: (1054, "Unknown column 'companyaddress' in 'field list'")
Here is my code
for d in companydetail:
lis = d.find_all('li')
companyname = lis[0].get_text().strip()
companyaddress = lis[1].get_text().strip()
companycity = lis[2].get_text().strip()
try:
companypostalcode = lis[3].get_text().strip()
companypostalcode = companypostalcode.replace(",","")
except:
companypostalcode = lis[3].get_text().strip()
try:
companywebsite = lis[4].get_text().strip()
except IndexError:
companywebsite = 'null'
print (companyname)
print (companyaddress)
print (companycity)
print (companypostalcode)
print (companywebsite)
try:
with connection.cursor() as cursor:
print ('saving to db')
cursor.execute("INSERT INTO company(companyname,address,city,pincode,website) VALUES (companyname,companyaddress,companycity,companypostalcode,companywebsite)")
connection.commit()
connection.close()
I am getting my data which I want but it I am not able to store data into database.
The result which I get while print (companyname) and print (campanyaddress) is :
NINGBO BOIGLE DIGITAL TECHNOLOGY CO.,LTD.
TIANYUAN INDUSTRIAL ZONE CIXI NINGBO
ZHEJIANGNINGBO
315325
http://www.boigle.com.cn
You cannot simply use variable names inside a query string as you do:
cursor.execute("INSERT INTO company(companyname,address,city,pincode,website) VALUES (companyname,companyaddress,companycity,companypostalcode,companywebsite)")
Instead, pass your variables into the query making it parameterized:
params = (companyname, companyaddress, companycity, companypostalcode, companywebsite)
cursor.execute("""
INSERT INTO
company
(companyname, address, city, pincode, website)
VALUES
(%s, %s, %s, %s, %s)
""", params)
In
cursor.execute("INSERT INTO company(companyname,address,city,pincode,website) VALUES (companyname,companyaddress,companycity,companypostalcode,companywebsite)")
the values in the second bracket are interpreted as table fields, rather than as python variables. Try
cursor.execute("""INSERT INTO company(
companyname,address,city,pincode,website)
VALUES (%s, %s, %s, %s, %s)""",
(companyname, companyaddress, companycity,
companypostalcode, companywebsite))
instead. You may also want to consult the docs on that.

Inserting list items into database as separate entries

I have an input field that is basically a comma delimited string (i.e. something like "deniscm, toms, peters"). That information is sent via AJAX to my Python handler SaveQueryPage. What I want to do is parse this information as a list and then insert each entry into my database. My code is as follows, but it doesn't work unfortunately. Any suggestions?
Python code:
class SaveQueryPage(webapp2.RequestHandler):
def post(self):
user = users.get_current_user()
user_nickname = user.nickname()
query_name = self.request.get('queryName')
query_collab = self.request.get('queryCollab')
query_collaborators = re.split(r'\s*[,]\s*', query_collab.strip())
query_collaborators = query_collaborators.append(user_nickname)
query_collaborators = filter(None, query_collaborators)
conn = rdbms.connect(instance=_INSTANCE_NAME, database='queryInfo')
cursor = conn.cursor()
cursor.execute('INSERT INTO queries (userNickname, queryName) VALUES (%s, %s)', (user_nickname, query_name))
conn.commit()
for item in query_collaborators:
cursor = conn.cursor()
cursor.execute('INSERT INTO collaborators (queryName, userNickname) VALUES (%s, %s)', (query_name, item))
conn.commit()
conn.close()
I finally managed to get it working. Looks like the regular expression turned the items in the list to a unicode format, which was only caught when I added some logs. I also had an error in appending a string to the list. Thanks for the pointers! The code below now works for me:
class SaveQueryPage(webapp2.RequestHandler):
def post(self):
user = users.get_current_user()
user_nickname = user.nickname()
user_email = user.email()
query_name = self.request.get('queryName')
query_description = self.request.get('queryDescription')
query_collab = self.request.get('queryCollab')
logging.info('Data read for query_collab is %s', query_collab)
query_collab_re = re.split(r'\s*[,;]\s*', query_collab.strip())
logging.info('Data read for query_collab_re is %s', query_collab_re)
query_collab_decode = []
for item in query_collab_re:
item = str(item)
query_collab_decode.append(item)
logging.info('Data read for query_collab_decode is %s', query_collab_decode)
query_collab_decode.append(user_nickname)
logging.info('Data read for query_collab_append is %s', query_collab_decode)
query_collab_filter = filter(None, query_collab_decode)
logging.info('Data read for query_collab_filter is %s', query_collab_filter)
query_value = self.request.get('queryValue') # query_value
date_created = datetime.today()
date_lastupdated = datetime.today()
active_flag = "true"
random_id = random.randint(1000000000000, 9999999999999)
unique_query_id = user_nickname + "_" + str(random_id)
conn = rdbms.connect(instance=_INSTANCE_NAME, database='userPrefs')
cursor = conn.cursor()
cursor.execute('INSERT INTO queries (userNickname, queryName, queryDescription, queryValue, dateCreated, dateLastUpdated, activeFlag, uniqueId) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)', (user_nickname, query_name, query_description, query_value, date_created, date_lastupdated, active_flag, unique_query_id))
conn.commit()
try:
for item in query_collab_filter:
cursor = conn.cursor()
cursor.execute('INSERT INTO collaborators (uniqueId, userNickname) VALUES (%s, %s)', (unique_query_id, item))
conn.commit()
except:
logging.error('There was an error inserting the values into the collaborators table. query_collaborators =' + str(query_collaborators))
conn.close()

Categories

Resources