MySQL inserts stop from Python - python

I am trying to pull records from the a database, do some processing on one of the fields, then insert the data back into the database. However, I notice that the script stops after one successful insert, even though there are many more records beyond what is inserted into the database.
cur = db.cursor()
# Pull records from the database, process each one, and store back into the database
SQLQuery = """SELECT * FROM cybersecurity4.hackhoundposts"""
cur.execute(SQLQuery)
for row in cur:
postID = row[0]
threadID = row[1]
authorID = row[2]
url = row[3]
subforum = row[4]
threadTitle = row[5]
authorName = row[6]
postDateTime = row[7]
postSequence = row[8]
flatcontent = row[9]
userTitle = row[11]
hasAttachment = row[12]
print (postID)
# #process the flatcontent
flatcontent = rmvSpecialCharsandCamelCase(flatcontent)
# insert into the database
try:
string = """INSERT INTO cybersecurity4.cleanedhackhoundposts
(postid, threadid, authorid, URL, subforum, threadTitle, authorName, postdatetime, postSequence, flatcontent, usertitle, hasattachment)
VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s');""" % \
(postID ,threadID,authorID,url,subforum,threadTitle,authorName,postDateTime,postSequence,flatcontent,userTitle,hasAttachment)
print (string)
cur.execute(string)
print ("Successfully inserted post " + str(postID))
except:
int("Failed to insert post" + str(postID))

As i understand your code, you have nothing repeating the insertion, so you are retrieving N records, but only inserting 1 record. Some sudo code of what you need to achieve is.
While(cursor hasnext):
select row
manipulate
insert
You are close to the result, but missing the last steps.
Edit:
You can not insert ids on the same id as you select you must let the db handle the id or simply update the row. So removing id from the insert query would probably fix the problem. Next time also check the database log it tells alot.

Related

Insert data from csv to postgreSQL database via Python

I'm brand new to postgreSQL or SQL at all.
I'm trying to create a table in a database via Python and then load data from a .csv file into the table.
My code looks like this:
import csv
import psycopg2
#Establish connection to database
con = psycopg2.connect(
host = "localhost",
database = "kundeavgang",
user = "postgres",
password = "postgres",
)
#Cursor
cur = con.cursor()
#If a mistake is made, start from scratch
cur.execute("DROP TABLE IF EXISTS kundeavgang")
#Create table
cur.execute('''
CREATE TABLE "kundeavgang"(
"customerID" TEXT,
"gender" TEXT,
"SeniorCitizen" TEXT,
"Partner" TEXT,
"Dependents" TEXT,
"tenure" INT,
"PhoneService" TEXT,
"MultipleLines" TEXT,
"InternetService" TEXT,
"OnlineSecurity" TEXT,
"DeviceProtection" TEXT,
"TechSupport" TEXT,
"StreamingMovies" TEXT,
"Contract" TEXT,
"PaperlessBilling" TEXT,
"PaymentMethod" TEXT,
"MonthlyCharges" FLOAT,
"TotalCharges" FLOAT,
"Churn" TEXT
)
''')
#Acsess .csv file
with open('kundeavgang.csv') as csvFile:
reader = csv.reader(csvFile)
skipHeader = next(reader) #Account for header
for row in reader:
customerID = row[0]
gender = row[1]
SeniorCitizen = row[2]
Partner = row[3]
Dependents = row[4]
tenure = row[5]
PhoneService = row[6]
MultipleLines = row[7]
InternetService = row[8]
OnlineSecurity = row[9]
OnlineBackup = row[10]
DeviceProtection = row[11]
TechSupport = row[12]
StreamingTV = [13]
StreamingMovies = row[14]
Contract = row[15]
PaperlessBilling = row[16]
PaymentMethod = row[17]
MonthlyCharges = row[18]
TotalCharges = row[19]
Churn = row[20]
cur.execute('''INSERT INTO kundeavgang(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''',(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn))
#Commit the transaction
con.commit()
#End connection
con.close()
In pgAdmin, the table comes up as existing in the database. However, I cannot find the actual table. Further, I have no idea about this line of code:
cur.execute('''INSERT INTO kundeavgang(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''',(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn))
What does all the %s stand for? I found it off an online example which was not very helpful, so I tried it without knowing what it means. I have seen some examples where question marks are inserted instead, but also this without explanation.
Lastly, as the code stands now, I get the error message:
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''',(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn))
IndexError: tuple index out of range
All help or explanations will be appreciated.
For bulk inserts from text files, consider copy_from or copy_expert of psycopg2. Also, be sure to commit your execution:
cur.execute("DROP TABLE IF EXISTS kundeavgang")
con.commit()
cur.execute('''CREATE TABLE "kundeavgang" ... ''')
con.commit()
with open('kundeavgang.csv') as csvFile:
next(csvFile) # SKIP HEADERS
cur.copy_from(csvFile, "kundeavgang", sep=",")
# POSTGRES COPY COMMAND FOR CSV MODE
# cur.copy_expert("""COPY "kundeavgang" FROM STDIN WITH CSV""", csvFile)
con.commit()
The %s are placeholders for the values that will be inserted and passed through the following tuple:
(customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,TotalCharges,Churn)
The problem that your insert statement going to insert to 20 columns, you provide 20 values in your tuple but you have 22 placeholders (%s).
The problem is a mismatch between the number of columns to be populated and the length of the list provided. This is an easy mistake to make when dealing with a lot of columns. One way to reduce risk of error is to use the length of the columns or values list to build the statement.
cols = [name1, name2,...]
vals = [val1, val2, ...]
assert len(cols) == len(vals), 'mismatch between number of columns and number of values'
template = """INSERT INTO tbl ({}) VALUES ({})"""
stmt = template.format(', '.join(cols), ','.join(['%s'] * len(vals)))
cur.execute(stmt, vals)
Note that when building the column names dynamically it's good practice to quote them - psycopg2 provides tools for this.
Change the line reader = csv.reader(csvFile) to:
reader = csv.reader(csvFile, delimiter=';')

MySQL Insert / update from python, data from excel spreadsheet

Upsert to MySQL using python and data from excel.
Im working on populating a MySQL DB, using python.
The data is stored on excel sheets.
Because the DB is suppossed to be used for monitoring "projects", there's a posibility for repeated pk, so in that case it need to be updated instead of insert, because a project can have many stages.
Also, there's a value to be inserted in the DB table, that can't be added from the spreadsheet. So i'm wondering if in that case, the insert of this value, most be done using a separated query for it or if theres a way to insert it in the same query. The value is the supplier ID and needs to be inserted between id_ops and cif_store.
And to finish, I need to perform an inner join, to import the store_id using the store_cif, from another table called store. I know how do it, but im wondering if it also must be executed from a sepparated query or can be performed at the sameone.
So far, i have done this.
import xlrd
import MySQLdb
def insert():
book = xlrd.open_workbook(r"C:\Users\DevEnviroment\Desktop\OPERACIONES.xlsx")
sheet = book.sheet_by_name("Sheet1")
database = MySQLdb.connect (host="localhost", user = "pytest", passwd = "password", db = "opstest1")
cursor = database.cursor()
query = """INSERT INTO operation (id_ops, cif_store, date, client,
time_resp, id_area_service) VALUES (%s, %s, %s, %s, %s, %s)"""
for r in range(1, sheet.nrows):
id_ops = sheet.cell(r,0).value
cif_store = sheet.cell(r,1).value
date = sheet.cell(r,2).value
client = sheet.cell(r,3).value
time_resp = sheet.cell(r,4).value
id_area_service = sheet.cell(r,5).value
values = (id_ops, cif_store, date, client, time_resp, id_area_service)
cursor.execute(query, values)
# Close the cursor
cursor.close()
# Commit the transaction
database.commit()
# Close the database connection
database.close()
# Print results
print ("")
print ("")
columns = str(sheet.ncols)
rows = str(sheet.nrows)
print ("Imported", columns,"columns and", rows, "rows. All Done!")
insert()
What you are looking for is INSERT ... ON DUPLICATE KEY UPDATE ...
Take a look here https://dev.mysql.com/doc/refman/8.0/en/insert-on-duplicate.html
Regarding the extraneous data, if its a static value for all rows you can just hard code it right into the INSERT query. If it's dynamic you'll have to write some additional logic.
For example:
query = """INSERT INTO operation (id_ops, hard_coded_value, cif_store, date, client,
time_resp, id_area_service) VALUES (%s, "my hard coded value", %s, %s, %s, %s, %s)"""

Python/MySQL - Error 1064, can't figure it out

I've been trying to find out what causes the error. I believe it is in the last query to the database. I've marked it with comments.
This error has been giving me a headache for the past 30 minutes.
import MySQLdb
import time
# Create a database connection
db = MySQLdb.connect(host="******", user="******", passwd="*****", db="*****")
cur = db.cursor()
# Create a query to select all IDs
cur.execute("SELECT id FROM users")
clientArray = []
# Loop over all IDs returned from query,
# save all IDs in the clientArray
for row in cur.fetchall():
clientID = str(row[0])
clientArray.append(clientID)
clientIDInput = ""
while True:
# Check and wait for input
clientIDInput = raw_input("")
if clientIDInput in clientArray:
# Check to see whether user is already signed in to the device
cur.execute("SELECT fitnessStatus FROM users WHERE id=%s", (clientIDInput))
data = cur.fetchone()
if data[0] == False:
cur.execute("UPDATE users SET fitnessStatus='1' WHERE id=%s", (clientIDInput))
checkInTime = time.strftime('%Y-%m-%d %H:%M:%S')
checkOutID = raw_input("")
if checkOutID == clientIDInput:
cur.execute("UPDATE users SET fitnessStatus='0' WHERE id=%s", (clientIDInput))
checkOutTime = time.strftime('%Y-%m-%d %H:%M:%S')
print checkInTime
print checkOutTime
### I BELIEVE THIS IS THE CAUSE OF THE ERROR ###
cur.execute("INSERT INTO activities (id, machinename, checkin, checkout, clientid) VALUES (NULL, Cross Trainer #5, %s, %s, %s)", (checkInTime, checkOutTime, clientIDInput))
# Send checkInTime and checkOutTime to database
There is a syntax error in your INSERT statement. Try to enclose the string 'Cross Trainer #5' in single quotes:
cur.execute("INSERT INTO activities (id, machinename, checkin, checkout, clientid) VALUES (NULL, 'Cross Trainer #5', %s, %s, %s)", (checkInTime, checkOutTime, clientIDInput))`
Luckily, the statement itself is already enclosed in double quotes " so that no further change would be required :)
The error 1064 is a bit misleading. It indicates, amongst others, abuse of a reserved word. And indeed: CROSS is a reserved word.

Python and sqlite- load tweets

Working/learning on loading a set of tweets into a sqlite db in Python 2.7 to then do some queries. But can't seem to get the data to read in.
I can create the db, create the table, but when I go to read the tweets, parse and load- it (seems) it is not populating the table.
Of the many possible fields in each tweet, I am only trying to capture six.
My code is:
#create database works fine
import sqlite3
conn = sqlite3.connect('twitter.db')
c = conn.cursor()
import urllib2
tweets=urllib2.urlopen("http://rasinsrv07.cstcis.cti.depaul.edu/CSC455/assignment4.txt")
tweets.readline()
#create table: works fine
c.execute("CREATE TABLE Tweet(created_at, id, text, source, in_reply_to_user_ID,retweet_Count)")
#Loads variables & data in table: Not loading. I think this is my problem here.
for elt in tweets:
currentRow = elt[:-1].split(", ")
insert = """insert into Tweet values ('%s', '%s', '%s', '%s', '%s', '%s')""" % ("created_at", "id", "text", 'source', 'in_reply_to_user_ID', 'retweet_Count')
print insert
conn.commit()
#tried to see the table
read_tweets = """SELECT * from Tweet """
c.fetchall()
#tried again to see the table. Must be no data in the table
read_tweets = """SELECT * from Tweet """
rows = c.fetchall()
for row in rows:
print row

Using INSERT with a PostgreSQL Database using Python

I am trying to insert data into a PostgreSQL database table using Python. I don't see any syntax errors but, for some reason, my data isn't getting inserted into the database.
conn = psycopg2.connect(connection)
cursor = conn.cursor()
items = pickle.load(open(pickle_file,"rb"))
for item in items:
city = item[0]
price = item[1]
info = item[2]
query = "INSERT INTO items (info, city, price) VALUES (%s, %s, %s);"
data = (info, city, price)
cursor.execute(query, data)
You have to commit the transaction.
conn.commit()
If there's no reason to think the transaction will fail, it's faster to commit after the for loop finishes.

Categories

Resources