read encrypted excel with pandas - python

I am scanning data through encrypted excel files with python. I would like to read the file content without opening excel. Here is my code. I usually use pandas to read files but pandas.read_excel do not allows to add password.
from xlrd import *
import win32com.client
import csv
import sys
xlApp = win32com.client.Dispatch("Excel.Application")
xlwb = xlApp.Workbooks.Open(path1+file_name, Password='password')
Thank you

Check and upvote if below lines help......
from xlrd import *
import win32com.client
import csv
import sys
import pandas as pd
from tempfile import NamedTemporaryFile
xlApp = win32com.client.Dispatch("Excel.Application")
filename,password = r'fullpath','password'
# Note this line from the question posted
xlwb = xlApp.Workbooks.Open(filename, False, True, None, password)
xlws = xlwb.Sheets(1) # index is from 1
print (xlws.Name)
print (xlws.Cells(1, 1)) # if you need cell values
f = NamedTemporaryFile(delete=False, suffix='.csv')
f.close()
os.unlink(f.name)
xlCSVWindows = 0x17 # CSV file format, from enum XlFileFormat
xlwb.SaveAs(Filename=f.name, FileFormat=xlCSVWindows) # Save as CSV
df = pd.read_csv(f.name)
print(df.head())
df.to_csv('myoutput.csv',index=False)

Related

I am trying to retrieve the content from an Excel using Python. I am getting Investment undefined error

import xlrd
import os
class datafromexcel:
def __init__(self) -> None:
rootPath = os.getcwd()
loc = (rootPath+"Investment.xlsx");
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
list = [];
for i in range(sheet.nrows):
#print(sheet.cell_value(i, 0),sheet.cell_value(i, 1))
list.append(Investment(sheet.cell_value(i, 0),sheet.cell_value(i, 1)));
print("Successfully retrieved all excel data");
I recommend you to use Pandas library and read your excel file with it
import pandas as pd
pd.read_excel('EXCEL_FILE.xlsx', index_col=0)
for more details: https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html

Trying to convert excel sheets to PDF using Python but throwing up this error

Hi I am trying to convert excel sheet to pdf using python, converted a script wrote to do same with word documents, which works fine but having this error below flagging up
Traceback (most recent call last):
File "C:/Users/alank/Python training/Exceltopdf2.py", line 13, in <module>
xlxs.SaveAs(out_file, FileFormat=xlxsFormatPDF)
OSError: exception: access violation reading 0xFFFFFFFFFFFFFFFF
any help appreciated and script is below
import sys
import os
import comtypes.client
xlxsFormatPDF = 17
in_file = (r'C:\Users\alank\Python training\Helloworld.xlsx')
out_file = (r'C:\Users\alank\Python training\Helloworld.pdf')
excel = comtypes.client.CreateObject('Excel.Application')
xlxs = excel.workbooks.Open(in_file)
xlxs.SaveAs(out_file, FileFormat=xlxsFormatPDF)
xlxs.Close()
excel.Quit()
You can try with win32com.client
like this:
import win32com.client
from pywintypes import com_error
WB_PATH = r'C:\Users\alank\Python training\Helloworld.xlsx'
PATH_TO_PDF = r'C:\Users\alank\Python training\Helloworld.pdf'
excel.Visible = False
try:
# Open
wb = excel.Workbooks.Open(WB_PATH)
# Specify the sheet you want to save by index.
#if you want all the sheets in excel try with:
#wb.WorkSheets(wb.Sheets.Count) or wb.WorkSheets([i=1 for i in range(wb.Sheets.Count)]).Select()
ws_index_list = [1,2,3,4,5,6,7,8,9,10,11,12]
wb.WorkSheets(ws_index_list).Select()
# Save
wb.ActiveSheet.ExportAsFixedFormat(0, PATH_TO_PDF)
except com_error as e:
print('The convertion failed.')
else:
print('Succeessful convertion')
finally:
wb.Close()
excel.Quit()
Or you can do it like here (Andreas solution):
import os
import comtypes.client
SOURCE_DIR = r'C:\Users\alank\Python training'
TARGET_DIR = r'C:\Users\alank\Python training'
app = comtypes.client.CreateObject('Excel.Application')
app.Visible = False
infile = os.path.join(os.path.abspath(SOURCE_DIR), 'Helloworld.xlsx')
outfile = os.path.join(os.path.abspath(TARGET_DIR), 'Helloworld.pdf')
doc = app.Workbooks.Open(infile)
doc.ExportAsFixedFormat(0, outfile, 1, 0)
doc.Close()
app.Quit()

to_csv pandas python not show tabluar format in excel

I want to export my file as CSV file. but when I open the CSV file with excel the appearance doesn't show as columnar view and show as a one string and all filelds concat together.
what is the problem: the picture is my goal.(Tabular format)
this is my code:
first I export the result from my database:
import cx_Oracle
query = """select * from test"""
db = cx_Oracle.connect(conn_str,encoding="UTF-8")
curs = db.cursor()
curs.execute(query)
result = curs.fetchall()
then I exort file in local os and the via ftp I upload in destination folder in another server:
import pandas as pd
from datetime import datetime,timedelta
import ftplib
df = pd.DataFrame(result)
df = df.rename_axis(None)
df.to_csv('C:\\test\\test.csv',index=False,header=False,
encoding='utf-16',sep = ',')
today = str(datetime.now() - timedelta(1))[:10]
today = today.split('-')
final_today = today[1]+today[2]
outputName = 'test%s.csv'%final_today
session = ftplib.FTP('1.1.1.1','test', 'test')
Output_Directory = '/test'
session.cwd(Output_Directory)
fh = open('C:\\tets\\test.csv','rb')
session.storbinary('STOR '+ outputName, fh) # send the file
fh.close() # close file and FTP
session.quit()
the problem solved by considering sep = '\t' in to_csv function.

Pandas and Tkinter , opening and saving files

I am trying to save the excel file as a .xlsx file then I want to import that new file back into python using pandas and numpy to allow for statistical analysis.
I want the USER to have the option of where to save the file with the 'asksaveasfile' and another dialog box 'askopenwhen opening the file for pandas and numpy.
Also, if someone can show how to convert specific columns from bytes to str. See below...
TypeError: write() argument must be str, not bytes.
Here's the end of the code:
import csv
import pandas as pd
import os
import tkinter as tk
from tkinter import filedialog
fn = filedialog.asksaveasfile(mode='w', defaultextension=".xlsx")
result = pdf_to_csv(fn)
lines = result.split('\n')
import openpyxl as pxl
wb = pxl.Workbook(fn)
ws = wb.active
for line in lines:
ws.append(line.split(';'))
# appending a list gives a complete row in xlsx
print("Successfully Saved! ")
root = tk.Tk()
root.withdraw()
dataFile=pd.read_excel(fn)#,usecols=['Last Name','First Name','Assignment Title','Department Code','Calendar Year', 'Compensation'])
dataFile.to_excel(fn)
print(fn)
df = fn
DataFrame = df
df1 = DataFrame
df1.columns = ['Last Name','First Name','Assignment Title','Department Code','Calendar Year', 'Compensation']
df1.drop(df1.index[0], inplace=True)
print(df1.head(11))

Data validation using openpyxl isnt writing to file - code enclosed

The code to actually write each file runs great. The problem I'm having is that the data validation piece doesn't appear to be doing anything. No drop downs are being created in the range I'm referencing.
Thanks in advance for any and all assistance!
%%time
import pandas as pd
import xlsxwriter as ew
import csv as csv
import os
import glob
import openpyxl
#remove existing files from directory
files = glob.glob(#filename)
for f in files:
os.remove(f)
pendpath = #filename
df = pd.read_sas(pendpath)
allusers = df.UserID_NB.unique()
listuserpath = #filename
listusers = pd.read_csv(listuserpath)
listusers = listusers['USER_ID'].apply(lambda x: str(x).strip())
for id in listusers:
x = df.loc[df['UserID_NB']==id]
path = #filename
x.to_excel(path, sheet_name = str(id), index = False)
from openpyxl import load_workbook
wb = openpyxl.load_workbook(filename = path)
sheet = wb.get_sheet_by_name(str(id))
maxrow = sheet.max_row
from openpyxl.worksheet.datavalidation import DataValidation
dv = DataValidation(type="list", formula1='"Yes,No"', allow_blank=False, showDropDown = True)
rangevar = 'R1:T'+ str(maxrow)
dv.ranges.append(rangevar)
wb.save(path)
print str(id), rangevar
Code for Basic Sheet
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
sheet.title = 'testsheet'
path = '#filepath'
from openpyxl.worksheet.datavalidation import DataValidation
dv = DataValidation(type="list", formula1='"Yes,No"', allow_blank=False, showDropDown = True)
dv.ranges.append('A1')
wb.save(path)
You are missing to add the dv to the worksheet.
>>> # Add the data-validation object to the worksheet
>>> ws.add_data_validation(dv)
Read the docs about validation

Categories

Resources