Write the headers & query to the csv - python

I am trying to save the query results from postgresql into a csv file but the csv file sometimes lacks the headers but writes all the details of the queries.
import psycopg2
import csv
try:
conn = psycopg2.connect(database = '', user = '', host = '', password = '')
except:
print ("I am unable to connect to the database")
cursor = conn.cursor()
query = """select * from"""
cursor.execute(query)
result = cursor.fetchall()
with open("kiker.csv","wb") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = ["Builder", "Subdivision", "Spec", "Build", "Cancel", "Price", "Sq_Ft", "PPSF", "Realtor", "Project ID"], extrasaction = 'ignore')
writer.writeheader()
writer.writerow(result)
print "Query 1 Created"
Error:
Traceback (most recent call last):
File "C:\\connecting.py", line 45, in <module>
writer.writerow(result)
File "C:\Python27\lib\csv.py", line 152, in writerow
return self.writer.writerow(self._dict_to_list(rowdict))
File "C:\Python27\lib\csv.py", line 149, in _dict_to_list
return [rowdict.get(key, self.restval) for key in self.fieldnames]
AttributeError: 'list' object has no attribute 'get'
I tried both the methods below, but both of them fail to include the header information from postgresql.
c = csv.writer(open("kiker.csv","wb"))
for row in result:
c.writerow(row)
and
fp = open("kiker.csv","wb")
myFile = csv.writer(fp)
myFile.writerows(result)
fp.close()
How can I fix this?

I used Pandas to get around the situation. Worked like a treat.
cursor.execute(query)
result = cursor.fetchall()
first = pd.DataFrame(result, columns = ["Builder","Subdivision","Spec","Build","Cancel","Price","Sq_Ft","PPSF","Realtor","Project ID"])
first.to_csv("kiker.csv",index = False)

DictWriter expects dicts, not tuples: https://docs.python.org/3.6/library/csv.html#writer-objects

Related

error while doing insert data from a file to mysql using python

there is a file containing data that I want to insert into table in mysql using python.I retrieved the data from file using "with open" and converted it to a list.so now in the list the elements are string and i want to change string to tuple so that i can retrieve the data from this to mysql.
import mysql.connector
with open("/home/ninky/com.csv", "r") as fp:
content = fp.read()
lines = content.split("\n")
print(lines)
myconn = mysql.connector.connect(host="localhost", user="root", database="EMPSALARY"
cur = myconn.cursor()
db = "insert into PERSON(name,age,year)values(%s,%s,%s)"
myconn = cur.execute(db, lines)
myconn.close()
result:
-----------
['deepak,29,2019', 'ninky,29,2010', 'suraj,29,2020', 'pratap,30,2018', '']
deepak,29,2019
ninky,29,2010
suraj,29,2020
pratap,30,2018
Traceback (most recent call last): File
"/home/ninky/PycharmProjects/new/csv_db.py", line 23, in <module>
cur.execute(db, lines) File "/usr/local/lib/python3.6/dist-packages/mysql/connector/cursor.py",
line 543, in execute
"Not all parameters were used in the SQL statement") mysql.connector.errors. ProgrammingError: Not all parameters were used
in the SQL statement
Process finished with exit code 1
In lines what you have is a list containing each line of your CSV file. Then, you need to iterate through all lines and do a split again on your separator (','):
import mysql.connector
with open("/home/ninky/com.csv", "r") as fp:
content = fp.read()
lines = content.split("\n")
print(lines)
myconn = mysql.connector.connect(host="localhost", user="root", database="EMPSALARY")
cur = myconn.cursor()
db = "insert into PERSON (name,age,year) values (%s,%s,%s)"
for l in lines:
data=l.split(',')
cur.execute(db, data)
myconn.close()

Python/JSON: Errors one after another

I am following an online tutorial for making a chatbot...here is the code for the beginning portion
import sqlite3
import json
from datetime import datetime
timeframe = '2015-01'
sql_transaction = []
connection = sqlite3.connect('{}.db'.format(timeframe))
c = connection.cursor()
def create_table():
c.execute("CREATE TABLE IF NOT EXISTS parent_reply(parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE, parent TEXT, comment TEXT, subreddit TEXT, unix INT, score INT)")
def format_data(data):
data = data.replace('\n', ' newlinechar ').replace('\r', ' newlinechar ').replace('"', "'")
return data
def find_parent(pid):
try:
sql = "SELECT comment FROM parent_reply WHERE comment_id = '{}' LIMIT 1".format(pid)
c.execute(sql)
result = c.fetchone()
if result != None:
return result[0]
else: return False
except Exception as e:
#print(str(e))
return False
if __name__ == '__main__':
create_table()
row_counter = 0
paired_rows = 0
with open('C:/Users/oriba/Desktop/Month of Reddit/RC_2015-01'.format(timeframe.split('-')[0], timeframe), encoding='ISO-8859-1', buffering=1000) as f:
for row in f:
print(row)
row_counter += 1
row = json.load(row)
parent_id = row['parent_id']
body = format_data(row['body'])
created_utc = row['created_utc']
score = row['score']
comment_id = row['name']
subreddit = row['subreddit']
parent_data = find_parent(parent_id)
When that runs, I get this error:
Traceback (most recent call last):
File "C:/Users/oriba/Desktop/Month of Reddit/chatbot.py", line 37, in <module>
for row in f:
File "C:\Users\oriba\AppData\Local\Programs\Python\Python36\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x90 in position 97: character maps to <undefined>
After searching online, I found that adding "encoding='ISO-8859-1'" to 'with open()' should fix it...then I get this error:
Traceback (most recent call last):
File "C:/Users/oriba/Desktop/Month of Reddit/chatbot.py", line 40, in <module>
row = json.load(row)
File "C:\Users\oriba\AppData\Local\Programs\Python\Python36\lib\json\__init__.py", line 296, in load
BZh91AY&SYÔAÙÕÿî»ÿÿÿúÿÿÿÿÿÿÿÿc*è` 1Ï. ñÕ ¢U±Ã$'¤;\=# ÝX9kl´ÜιKW; É# Ò PQáGF PÝ Û P :è
return loads(fp.read(),
AttributeError: 'str' object has no attribute 'read'
And now I'm at a loss. I know this is a lot, this is complex for me. I appreciate any help :)
json.loads() does the job.
loads() read from a str object while load() read from a File object
You code is
for row in f:
...
row here is a str

Invalid Argument error on the python code

I am beginner to python, and I have this code to decompress log file, but I have the error Invalid argument. I don't know why I got this error, in my opinion, I think it is because the log file is too big, cause I am scanning a log file which is 2gb file. But I have no idea how to fix the error. Please help, thank you. And below is my code with the error:
import glob
import gzip
import os
import pymysql
import logging
# path to gz directory
GZ_DIR = '/Users/kiya/Desktop/mysql/csv'
# Database Infomation
DB_HOST='locahost'
DB_USER='dbuser'
DB_PASS='dbPassword'
DB_NAME='dbname'
LOGFILE="exception.log"
def csv_reader(file, header=False):
import csv
with open(file, "r") as f:
reader = csv.reader(f)
if header:
next(reader)
for row in reader:
yield row
def import_sql(filename, dbHostName, dbUser, dbPassword, databaseName):
db = pymysql.connect(host=dbHostName,
user=dbUser,
password=dbPassword,
db=databaseName,
charset='utf8')
for row in csv_reader(filename, False):
# prepare a cursor object using cursor() method
with db.cursor() as cursor:
if row[3] == "THREAT" and row[4] == "url":
sql = ("INSERT INTO PADIAGDB.url ("
"Domain,Receive_Time,Serial,Type,Threat_Content_Type,"
"Config_Version,Generate_Time,Source_address,Destination_address,"
"NAT_Source_IP,NAT_Destination_IP,Rule,Source_User,"
"Destination_User,Application,Virtual_System,Source_Zone,"
"Destination_Zone,Inbound_Interface,Outbound_Interface,Log_Action,"
"Time_Logged,Session_ID,Repeat_Count,Source_Port,Destination_Port,"
"NAT_Source_Port,NAT_Destination_Port,Flags,IP_Protocol,Action,"
"URL_Filename,Threat_Content_Name,Category,Severity,Direction,"
"Sequence_Number,Action_Flags,Source_Country,Destination_Country,"
"cpadding,contenttype,pcap_id,filedigest,cloud,url_idx,user_agent,"
"filetype,xff,referer,sender,subject,recipient,reportid,"
"dg_hier_level_1,dg_hier_level_2,dg_hier_level_3,dg_hier_level_4,"
"Virtual_System_Name,Device_Name,file_url )"
""
"VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
"%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
"%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
"); ")
elif row[3] == "SYSTEM":
sql = ("INSERT INTO PADIAGDB.system ("
"Domain,Receive_Time,Serial,Type,Threat_Content_Type,Config_Version,"
"Generate_Time,Virtual_System,Event_ID,Object,fmt,id,module,Severity,"
"Description,Sequence_Number,Action_Flags,dg_hier_level_1,"
"dg_hier_level_2,dg_hier_level_3,dg_hier_level_4,Virtual_System_Name,"
"Device_Name )"
""
"VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
"%s,%s,%s );")
else:
continue
try:
cursor.execute('SET foreign_key_checks = 0')
# Execute the SQL command
r = cursor.execute(sql, row)
#Commit your changes in the database
cursor.execute('SET foreign_key_checks = 1')
db.commit()
except Exception as e:
logging.exception(e)
db.rollback()
# disconnect from server
db.close()
gz_files = (gz for gz in glob.glob(os.path.join(GZ_DIR, '*.gz')))
for gz_file in gz_files:
with gzip.open(gz_file, 'rb') as in_file:
s = in_file.read()
sql_file = gz_file[:-3]
sql_file = sql_file[:-4] + '.csv'
with open(sql_file, 'wb') as out_file:
out_file.write(s)
import_sql(out_file, DB_HOST, DB_USER, DB_PASS, DB_NAME)
os.remove(sql_file)
This is the error I got:
Traceback (most recent call last):
File "/Users/kiya/Desktop/mysql/csv/sql3.py", line 180, in <module>
out_file.write(s)
OSError: [Errno 22] Invalid argument
for reading big files, you will have to read and write in chucks, try smth like (draft)
fr = open(input_file, 'rb')
fw = open(output_file, 'wb')
while True:
chunk = fr.read(1024)
if not chunk:
break
fw.write(chunk)
fr.close()
fw.close()
you can use context mangers of course aka with

Python cPickle from a db field TypeError

In my project i save a list as a cPickle object in a byte field of my postgresql db like this:
my_list = list(self.outhtml)
bitRst = cPickle.dumps(my_list)
cur.execute("INSERT INTO frontend_t_test (test_id, test_data, test_creation, ip_addr) VALUES (%s, %s, %s, %s)",
(str(id(self)),
psycopg2.Binary(bitRst), dt, test_utils.return_ip()))
All done, when i try to retrive my list i do:
cur = myconn.cursor()
cur.execute("""SELECT test_data FROM frontend_t_test WHERE test_id = %s""",(idTest,))
res = cur.fetchall()
with open(res, 'rb') as f:
mynewlist = cPickle.load(f)
but instead of the original list i get this error:
Traceback (most recent call last):
File "", line 1, in
with open(res, 'rb') as f:
TypeError: coercing to Unicode: need string or buffer, list found
How can i retreive my pickled list from db?
Thanks in advance
The pickle.load method expects to get a file object, but you are providing a list (res) instead. You need to write the result of cur.fetchall to a file and pass that in or use pickle.dump to write the list:
with open('outfile', 'wb') as f:
pickle.dump(res, fp)
and for reading:
with open('outfile', 'rb') as f:
mynewlist = pickle.load(res)
You don't need to "open" data returned from cursor. Just use the data and call loads.
cur.execute("""SELECT test_data FROM frontend_t_test WHERE test_id = %s""",(idTest,))
res = cur.fetchone()
data = str(res[0]) # buffer => str
mynewlist = cPickle.loads(data)

PyMarc Invalid Literal Error

I'm trying to parse a MARC file downloaded from the Library of Congress. I've successfully downloaded the record using the PyZ3950, but when I try to parse the file using PyMarc, I get the following error:
Traceback (most recent call last):
File "test.py", line 13, in <module>
for record in reader:
File "build/bdist.macosx-10.9-intel/egg/pymarc/reader.py", line 83, in next
ValueError: invalid literal for int() with base 10: '<PyZ3'
And here is my full code:
from PyZ3950 import zoom, zmarc
from pymarc import MARCReader
conn = zoom.Connection('z3950.loc.gov', 7090)
conn.databaseName = 'VOYAGER'
conn.preferredRecordSyntax = 'USMARC'
query = zoom.Query('CCL', 'ti="1066 and all that"')
res = conn.search(query)
reader = MARCReader(str(res))
for record in reader:
print record.title()
conn.close()
Your statement:
res = conn.search(query)
return a ResultSet, accordingly to http://www.panix.com/~asl2/software/PyZ3950/zoom.html
Each record r in the resultSet have the data in r.data
So, you have to feed the MARCReader with each r.data or with them all concatenated.
This will work:
from PyZ3950 import zoom, zmarc
from pymarc import MARCReader
conn = zoom.Connection('z3950.loc.gov', 7090)
conn.databaseName = 'VOYAGER'
conn.preferredRecordSyntax = 'USMARC'
query = zoom.Query('CCL', 'ti="1066 and all that"')
res = conn.search(query)
marc = ''
for r in res:
marc = marc + r.data
reader = MARCReader(marc)
for record in reader:
print record.title()
conn.close()

Categories

Resources