Why does my pandas dataframe have two headers? - python

def get_all_rows(conn):
df6 = pd.read_sql_query("SELECT * FROM Outofcountry", conn)
print(df6)
return
ComputerName ConnectTime lastExtIP latestCountry latestRegion latestCity Name CurrentLogonUser LastLogonUser PrimaryUser UserName
0 ComputerName ConnectTime_decimal lastExtIP latestCountry latestRegion latestCity Name CurrentLogonUser LastLogonUser PrimaryUser UserName\n
Id like to be able to get rid of the 2nd row...
this is the code block:
def get_all_rows(conn):
df6 = pd.read_sql_query("SELECT * FROM Outofcountry", conn)
print(df6)
return
d1 = pd.read_csv("CS_Out_Of_Country.csv", mangle_dupe_cols='True', encoding='windows-1252')
i tried adding this and with False but it doesnt do anything
would like it to be just one output for the header:
ComputerName ConnectTime lastExtIP latestCountry latestRegion latestCity Name CurrentLogonUser LastLogonUser PrimaryUser UserName

It seems like the header appears two times in your SQL table. You could dust do:
df6.drop([0], inplace=True)

Related

How to pass the date parameter in pyspark query using Jupyter notebook?

I want to pass the date parameter in below query in jupyter notebook but its not working as the way its mentioned below. Dont know where the problem is lying.
filedate = '2022-11-15'
query = """(select * from db.xyz
where name = 'Tom'
and login = '{filedate}') as salary"""
df = spark.read.format("jdbc")\
.option("url", jdbc_url)\
.option("driver",jdbc_driver)\
.option("dbtable" ,query).load()
You are missing an f-string:
filedate = '2022-11-15'
query = f"""(select * from db.xyz
where name = 'Tom'
and login = '{filedate}') as salary"""

Decryption not working - how to get raw data from csv/pandas - python

Below is my code for decrypting from a csv file stored on DropBox. I get the user to type in their ID, I match this with a database containing hashed values, and then I use the ID typed in to search my stored csv file for the matching row. I then place all the row values into my decryption function.
Also im aware my variable names/formatting is awful im just using this code as a prototype as of right now.
My results are being printed as such:
b'b\xebS\x1b\xc8v\xe2\xf8\xa2\\x84\x0e7M~\x1b'
b'\x01B#6i\x1b\xfc]\xc3\xca{\xd5{B\xbe!'
b'in*V7\xf3P\xa0\xb2\xc5\xd2\xb7\x1dz~\x95'
I store my key and IV so they are always the same, yet the decryption doesnt seem to work. My only thinking is perhaps my data is changed somehow when stored in a csv or pandas table etc. does anyone know what the issue would be or if the bytes can be altered when stored/imported to dataframe?
also maybe i am extracting the data from my csv to pandas incorrectly?
def login():
import sqlite3
import os.path
def decoder():
from Crypto.Cipher import AES
import hashlib
from secrets import token_bytes
cursor.execute(
'''
Select enc_key FROM Login where ID = (?);
''',
(L_ID_entry.get(), ))
row = cursor.fetchone()
if row is not None:
keys = row[0]
#design padding function for encryption
def padded_text(data_in):
while len(data_in)% 16 != 0:
data_in = data_in + b"0"
return data_in
#calling stored key from main file and reverting back to bytes
key_original = bytes.fromhex(keys)
mode = AES.MODE_CBC
#model
cipher = AES.new(key_original, mode, IV3)
#padding data
p4 = padded_text(df1.tobytes())
p5 = padded_text(df2.tobytes())
p6 = padded_text(df3.tobytes())
#decrypting data
d_fname = cipher.decrypt(p4)
d_sname = cipher.decrypt(p5)
d_email = cipher.decrypt(p6)
print(d_fname)
print(d_sname)
print(d_email)
#connecting to db
try:
conn = sqlite3.connect('login_details.db')
cursor = conn.cursor()
print("Connected to SQLite")
except sqlite3.Error as error:
print("Failure, error: ", error)
finally:
#downloading txt from dropbox and converting to dataframe to operate on
import New_user
import ast
_, res = client.files_download("/user_details/enc_logins.csv")
with io.BytesIO(res.content) as csvfile:
with open("enc_logins.csv", 'rb'):
df = pd.read_csv(csvfile, names=['ID', 'Fname', 'Sname', 'Email'], encoding= 'unicode_escape')
newdf = df[(df == L_ID_entry.get()).any(axis=1)]
print(newdf)
df1 = newdf['Fname'].to_numpy()
df2 = newdf['Sname'].to_numpy()
df3 = newdf['Email'].to_numpy()
print(df1)
print(df2)
print(df3)
csvfile.close()
decoder()

Import data from python (probleme with where condition)

I work in Python
I have code that allows me to import a dataset that works fine. However in my dataset I have 3 different patients and I would like to import only the patient that interests me (possible by adding the WHERE statement in the SQL query.
So the following code works:
def importecdata():
query2 = "SELECT TECDATA.[Vol_Recalage_US_VD], TECDATA.[Vol_Recalage_Us_VG], TECDATA.[SUBJID] FROM TECDATA INNER JOIN MEDDATA ON TECDATA.DateTime = MEDDATA.DateTime WHERE TECDATA.[SUBJID]='patient14';"
dftec1 = pd.read_sql(query2, sql_conn, chunksize=100000)
dftec = pd.concat(dftec1)
return(dftec)
It return the patient 14 data
But now I want to put the patient's name as a variable in my function so I made the following code:
def importecdata(patient):
query2 = "SELECT TECDATA.[Vol_Recalage_US_VD], TECDATA.[Vol_Recalage_Us_VG], TECDATA.[SUBJID] FROM TECDATA INNER JOIN MEDDATA ON TECDATA.DateTime = MEDDATA.DateTime WHERE TECDATA.[SUBJID]=patient;"
dftec1 = pd.read_sql(query2, sql_conn, chunksize=100000)
dftec = pd.concat(dftec1)
return(dftec)
I chek and the patient variable got the value patient14. But it don't work... i try to modify the value of the variable patient to 'patient14' it don't work too i have the same error :
invalid column name \xa0: 'patient'. So the code works, the problem is from the "where" condition with the patient variable
(sorry for my english i'm french)
You have to add your patient value in the query string check below code:
def importecdata(patient):
query2 = "SELECT TECDATA.[Vol_Recalage_US_VD], TECDATA.[Vol_Recalage_Us_VG], TECDATA.[SUBJID] FROM TECDATA INNER JOIN MEDDATA ON TECDATA.DateTime = MEDDATA.DateTime WHERE TECDATA.[SUBJID]='{0}';"
query2 = query2.format(patient)
dftec1 = pd.read_sql(query2, sql_conn, chunksize=100000)
dftec = pd.concat(dftec1)
return(dftec)

How do I send output to a printer using TKinter?

I have the following code in a TKinter (8.6) program. When I invoke the Run_Report function the data is written as expected to the Editor. I would however also like it to print it to my printer. I have put in code which I think should do the job but it returns an error:
lpr.stdin.write("Population Data"+"n".encode())
TypeError: can only concatenate str (not "bytes") to str
The code for the functions is as follows:
def Run_Report():
import subprocess
lpr = subprocess.Popen("/usr/bin/lpr", stdin=subprocess.PIPE)
sPrinciple=e_SelectInv.get()
Sql=("SELECT Principle as Investment, RptDate as Report_Date, printf('%,.2f',RptVal) as Report_Value, printf('%.2f',ExchRate) as Exchange_Rate, printf('%,d',RptVal*ExchRate) as Total_Value FROM MyInv WHERE Principle = ?" +
"order by Id Asc")
conn = sqlite3.connect(r'/home/bushbug/Databases/TrackMyInv')
curs = conn.cursor()
curs.execute(Sql,(sPrinciple,))
col_names = [cn[0] for cn in curs.description]
rows = curs.fetchall()
x = PrettyTable(col_names)
x.align[col_names[0]] = "l"
x.align[col_names[1]] = "r"
x.align[col_names[2]] = "r"
x.align[col_names[3]] = "r"
x.align[col_names[4]] = "r"
x.padding_width = 1
for row in rows:
x.add_row(row)
print (x)
tabstring = x.get_string()
output=open("export.txt","w")
output.write("Population Data"+"n")
output.write(tabstring)
lpr.stdin.write("Population Data"+"n".encode())
output.close()
conn.close()
Can someone please help me correct whatever is wrong in the above?
Thank you in anticipation.

Unable to insert caluclated field in PIVOT TABLE created using win32COM python library

I am trying to insert a calculated field in PIVOT TABLE created using win32com python library. But when i execute my code excel gives me error "References, names and arrays are not supported in Pivot Table formulas"
import win32com.client
Excel = win32com.client.gencache.EnsureDispatch('Excel.Application')
win32c = win32com.client.constants
Wb = Excel.Workbooks.Open('MyWorkbook')
Excel.Visible = True
Ws = Wb.Sheets('PR Jan20')
Wb.Sheets.Add()
Wb.ActiveSheet.Name = 'PivotSheet'
WsP = Wb.Sheets('PivotSheet')
MaxR = Ws.UsedRange.Rows.Count
MaxC = Ws.UsedRange.Columns.Count
C1 = Ws.Cells(1,1)
C2 = Ws.Cells(MaxR, MaxC)
PivotSourceRange = Ws.Range(C1,C2)
PCache = Wb.PivotCaches().Create(SourceType=win32c.xlDatabase, SourceData=PivotSourceRange,Version=win32c.xlPivotTableVersion14)
PTable = PCache.CreatePivotTable(TableDestination=WsP.Range('B2'), TableName='RegisterPivot', DefaultVersion=win32c.xlPivotTableVersion14)
PTable.PivotFields('Party').Orientation = win32c.xlRowField
PTable.PivotFields('Party').Position = 1
PTable.AddDataField(PTable.PivotFields('Gross Kgs'))
PTable.AddDataField(PTable.PivotFields('Amount (RS.)'))
#till above this line code is working fine
#this below line is causing issue
PTable.CalculatedFields().Add('Average Purchase Rate', '= Amount (RS.) / Gross Kgs')
'Excel Error'
I have managed to resolve the above issue. The problem was with the column name "Amount (RS.)" i renamed the column to "Amount" and everything worked fine. I think VBA is not comfortable with () parenthesis in Pivot Field name.

Categories

Resources