Invalid Argument error on the python code - python
I am beginner to python, and I have this code to decompress log file, but I have the error Invalid argument. I don't know why I got this error, in my opinion, I think it is because the log file is too big, cause I am scanning a log file which is 2gb file. But I have no idea how to fix the error. Please help, thank you. And below is my code with the error:
import glob
import gzip
import os
import pymysql
import logging
# path to gz directory
GZ_DIR = '/Users/kiya/Desktop/mysql/csv'
# Database Infomation
DB_HOST='locahost'
DB_USER='dbuser'
DB_PASS='dbPassword'
DB_NAME='dbname'
LOGFILE="exception.log"
def csv_reader(file, header=False):
import csv
with open(file, "r") as f:
reader = csv.reader(f)
if header:
next(reader)
for row in reader:
yield row
def import_sql(filename, dbHostName, dbUser, dbPassword, databaseName):
db = pymysql.connect(host=dbHostName,
user=dbUser,
password=dbPassword,
db=databaseName,
charset='utf8')
for row in csv_reader(filename, False):
# prepare a cursor object using cursor() method
with db.cursor() as cursor:
if row[3] == "THREAT" and row[4] == "url":
sql = ("INSERT INTO PADIAGDB.url ("
"Domain,Receive_Time,Serial,Type,Threat_Content_Type,"
"Config_Version,Generate_Time,Source_address,Destination_address,"
"NAT_Source_IP,NAT_Destination_IP,Rule,Source_User,"
"Destination_User,Application,Virtual_System,Source_Zone,"
"Destination_Zone,Inbound_Interface,Outbound_Interface,Log_Action,"
"Time_Logged,Session_ID,Repeat_Count,Source_Port,Destination_Port,"
"NAT_Source_Port,NAT_Destination_Port,Flags,IP_Protocol,Action,"
"URL_Filename,Threat_Content_Name,Category,Severity,Direction,"
"Sequence_Number,Action_Flags,Source_Country,Destination_Country,"
"cpadding,contenttype,pcap_id,filedigest,cloud,url_idx,user_agent,"
"filetype,xff,referer,sender,subject,recipient,reportid,"
"dg_hier_level_1,dg_hier_level_2,dg_hier_level_3,dg_hier_level_4,"
"Virtual_System_Name,Device_Name,file_url )"
""
"VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
"%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
"%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
"); ")
elif row[3] == "SYSTEM":
sql = ("INSERT INTO PADIAGDB.system ("
"Domain,Receive_Time,Serial,Type,Threat_Content_Type,Config_Version,"
"Generate_Time,Virtual_System,Event_ID,Object,fmt,id,module,Severity,"
"Description,Sequence_Number,Action_Flags,dg_hier_level_1,"
"dg_hier_level_2,dg_hier_level_3,dg_hier_level_4,Virtual_System_Name,"
"Device_Name )"
""
"VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,"
"%s,%s,%s );")
else:
continue
try:
cursor.execute('SET foreign_key_checks = 0')
# Execute the SQL command
r = cursor.execute(sql, row)
#Commit your changes in the database
cursor.execute('SET foreign_key_checks = 1')
db.commit()
except Exception as e:
logging.exception(e)
db.rollback()
# disconnect from server
db.close()
gz_files = (gz for gz in glob.glob(os.path.join(GZ_DIR, '*.gz')))
for gz_file in gz_files:
with gzip.open(gz_file, 'rb') as in_file:
s = in_file.read()
sql_file = gz_file[:-3]
sql_file = sql_file[:-4] + '.csv'
with open(sql_file, 'wb') as out_file:
out_file.write(s)
import_sql(out_file, DB_HOST, DB_USER, DB_PASS, DB_NAME)
os.remove(sql_file)
This is the error I got:
Traceback (most recent call last):
File "/Users/kiya/Desktop/mysql/csv/sql3.py", line 180, in <module>
out_file.write(s)
OSError: [Errno 22] Invalid argument
for reading big files, you will have to read and write in chucks, try smth like (draft)
fr = open(input_file, 'rb')
fw = open(output_file, 'wb')
while True:
chunk = fr.read(1024)
if not chunk:
break
fw.write(chunk)
fr.close()
fw.close()
you can use context mangers of course aka with
Related
Unexpected unindent python to execute an sql query
I have this python code to execute an sql file: import mysql.connector cnx = mysql.connector.connect(user='skynetadmin', password='Skynetadmin1', host='silmysskytest01.sing.micron.com', database='skynet_msa') cursor =cnx.cursor() def executeScriptsFromFile(filename): fd = open(filename, 'r') sqlFile = fd.read() fd.close() sqlCommands = sqlFile.split(';') for command in sqlCommands: try: if command.strip() != '': cursor.execute(command) executeScriptsFromFile('C:\Users\gsumarlin\Documents\dumps\Dump20220428\Query_for_testingintern.sql') cnx.commit() Hwv, I have this error: executeScriptsFromFile('C:\Users\gsumarlin\Documents\dumps\Dump20220428') ^ unexpected unindent I tried moving ard with the indents but it gives me no solution. Any help is appreciated!
import mysql.connector def executeScriptsFromFile(Query_for_testingintern): fd = open(Query_for_testingintern, 'r') sqlFile = fd.read() fd.close() sqlCommands = sqlFile.split(';') for command in sqlCommands: try: if command.strip() != '': cursor.execute(command) except: pass cnx = mysql.connector.connect(user='skynetadmin',password='Skynetadmin1',host='silmysskytest01.sing.micron.com',database='skynet_msa') cursor =cnx.cursor() executeScriptsFromFile("C:\\Users\gsumarlin\Documents\dumps\Dump20220428") cnx.commit()
Do you use Notepad++ or something like that? You can simply open the file there and select all (Ctrl + A) Then click on "Edit" -> Non printable characters -> Convert Tabs to Spaces
read oracle sql file using python and fetch results in to CSV file
I am beginner to python and want to read a sql file from python and fetch those results in to CSV file. I have tried usin CX_oracle to connect to oracle database. It works when i directly give the sql query in the code. But, I am not sure how to read a sql file from python. Here is the code which worked when i gave sql query directly in the code: import csv import cx_Oracle con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME') cursor = con.cursor() csv_file = open("exp.csv", "w") writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE) r = cursor.execute("select * from home_parties where CREATION_DATE >= trunc(sysdate)") for row in cursor: writer.writerow(row) cursor.close() con.close() csv_file.close() I tried the below code to fetch from sql file which has the same sql query as mentioned in the above code - sample.sql but it dint work import csv import cx_Oracle con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME') cursor = con.cursor() csv_file = open("exp.csv", "w") writer = csv.writer(csv_file, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE) f = open('C:/Users/home1/sample.sql') full_sql = f.read() r = cursor.execute(full_sql) for row in cursor: writer.writerow(row) cursor.close() con.close() csv_file.close() Kindly help me out!!
The content of the sql file might be read within a for loop line by line such as import csv import cx_Oracle con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME') cursor = con.cursor() sql="" with open("C:/Users/home1/sample.sql") as f_in: for line in f_in: sql += line f_out = open("exp.csv", "w") writer = csv.writer(f_out, delimiter='|', lineterminator="\n", quoting=csv.QUOTE_NONE) cursor.execute(sql) for row in cursor: writer.writerow(row) cursor.close() con.close() f_out.close() f_in.close() considering the SQL statement to be spreaded across multiple lines
Assuming you have a sample.sql file in the same directory with the query as the first line. This would read the SQL query from that file, then execute the query and save the result into a CSV file named "exp.csv". import cx_Oracle con = cx_Oracle.connect('XCM/XCM#home.com:1500/HOME') cursor = con.cursor() import pandas as pd with open('sample.sql') as f: sql_query_string = f.readline(); sql_query = pd.read_sql_query(sql_query_string, con) sql_query.to_csv("exp.csv", sep='|')
Python PostgreSQL "error insert has more target columns than expressions" but its not
Im new to SQL and PostgreSQL and I cant understand whats going on with this code. Im trying to insert csv to postgres with this code: import csv import psycopg2 as pg filename = 'myfile.csv' try: conn = pg.connect(user="myuser", password="mypass", host="myhost", port="5432", database="mydb") cursor = conn.cursor() with open(filename, 'r') as f: reader = csv.reader(f) next(reader) # This skips the 1st row which is the header. for record in reader: print(record) cursor.execute("""INSERT INTO auth VALUES (%s, %s, %s, %s)""", record) conn.commit() except (Exception, pg.Error) as e: print(e) finally: if (conn): cursor.close() conn.close() print("Connection closed.") but it raise error insert has more target columns than expressions LINE 1: ...00000000-0000-0000-0000-000000000000', '1580463062', 'auto') but here is what i want to insert ['00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000', '1580463062', 'auto'] and its look like its definitely has exactly 4 colums I also tried to change the encoding of csv from ASCII to UTF-8 and UTF-8_SIG but i still get this error
I solve my problem with this code import psycopg2 conn = psycopg2.connect("host=host dbname=dbname user=user password=password") cur = conn.cursor() with open(filename, 'r') as f: next(f) cur.copy_from(f, 'auth', sep=',') conn.commit() cur.close() conn.close()
pydev to postgres issue with code
I am trying to send data from python using pydev to postgresql. I am finding more than one error and could really need help. Many thanks! One of the error says: psycopg2.InterfaceError: cursor already closed. Any help would be appreciated! Here is my code to create the table #!/usr/bin/python # -*- coding: utf-8 -*- import psycopg2 import sys import csv from itertools import count path = r'C:\Users\sammy\Downloads\E0.csv' with open(path, "r") as csvfile: readCSV = csv.reader(csvfile, delimiter=",") firstline = 1 con = None con = psycopg2.connect("host='localhost' dbname='football' user='postgres' password='XXX'") cur = con.cursor() con.commit() cur.execute("DROP TABLE testtest1234") cur.execute("CREATE TABLE testtest1234 (HY INTEGER)") for row in readCSV: if firstline: firstline=0 continue new_data = row[19] try: cur.execute("INSERT INTO testtest1234 values ("+new_data+")") cur.execute("SELECT * FROM testtest1234;") except psycopg2.DatabaseError as e: if con: con.rollback() print ("Error %s % e", e) sys.exit(1) finally: if con: con.close() print (new_data) print(" ".join(row)) out=open("new_data.csv", "w") output = csv.writer(out) for row in new_data: output.writerow(row) out.close() Here is my code to insert in the table #!/usr/bin/python # -*- coding: utf-8 -*- import psycopg2 import sys import csv from itertools import count path = r'C:\Users\sammy\Downloads\E0.csv' with open(path, "r") as csvfile: readCSV = csv.reader(csvfile, delimiter=",") con = None con = psycopg2.connect("host='localhost' dbname='football' user='postgres' password='XXX'") cur = con.cursor() con.commit() for row in readCSV: new_data = row[19] print (new_data) try: cur.execute("INSERT INTO testtest1234 values ("+new_data+")") except psycopg2.DatabaseError as e: if con: con.rollback() print ("Error %s % e", e) sys.exit(1) finally: if con: con.close() print(" ".join(row)) out=open("new_data.csv", "w") output = csv.writer(out) for row in new_data: output.writerow(row) out.close()
You are closing the connection inside the loop, so only the first one row is inserted. for row in readCSV: try: ... except psycopg2.DatabaseError as e: ... finally: if con: con.close()
Write the headers & query to the csv
I am trying to save the query results from postgresql into a csv file but the csv file sometimes lacks the headers but writes all the details of the queries. import psycopg2 import csv try: conn = psycopg2.connect(database = '', user = '', host = '', password = '') except: print ("I am unable to connect to the database") cursor = conn.cursor() query = """select * from""" cursor.execute(query) result = cursor.fetchall() with open("kiker.csv","wb") as csvfile: writer = csv.DictWriter(csvfile, fieldnames = ["Builder", "Subdivision", "Spec", "Build", "Cancel", "Price", "Sq_Ft", "PPSF", "Realtor", "Project ID"], extrasaction = 'ignore') writer.writeheader() writer.writerow(result) print "Query 1 Created" Error: Traceback (most recent call last): File "C:\\connecting.py", line 45, in <module> writer.writerow(result) File "C:\Python27\lib\csv.py", line 152, in writerow return self.writer.writerow(self._dict_to_list(rowdict)) File "C:\Python27\lib\csv.py", line 149, in _dict_to_list return [rowdict.get(key, self.restval) for key in self.fieldnames] AttributeError: 'list' object has no attribute 'get' I tried both the methods below, but both of them fail to include the header information from postgresql. c = csv.writer(open("kiker.csv","wb")) for row in result: c.writerow(row) and fp = open("kiker.csv","wb") myFile = csv.writer(fp) myFile.writerows(result) fp.close() How can I fix this?
I used Pandas to get around the situation. Worked like a treat. cursor.execute(query) result = cursor.fetchall() first = pd.DataFrame(result, columns = ["Builder","Subdivision","Spec","Build","Cancel","Price","Sq_Ft","PPSF","Realtor","Project ID"]) first.to_csv("kiker.csv",index = False)
DictWriter expects dicts, not tuples: https://docs.python.org/3.6/library/csv.html#writer-objects