The code to actually write each file runs great. The problem I'm having is that the data validation piece doesn't appear to be doing anything. No drop downs are being created in the range I'm referencing.
Thanks in advance for any and all assistance!
%%time
import pandas as pd
import xlsxwriter as ew
import csv as csv
import os
import glob
import openpyxl
#remove existing files from directory
files = glob.glob(#filename)
for f in files:
os.remove(f)
pendpath = #filename
df = pd.read_sas(pendpath)
allusers = df.UserID_NB.unique()
listuserpath = #filename
listusers = pd.read_csv(listuserpath)
listusers = listusers['USER_ID'].apply(lambda x: str(x).strip())
for id in listusers:
x = df.loc[df['UserID_NB']==id]
path = #filename
x.to_excel(path, sheet_name = str(id), index = False)
from openpyxl import load_workbook
wb = openpyxl.load_workbook(filename = path)
sheet = wb.get_sheet_by_name(str(id))
maxrow = sheet.max_row
from openpyxl.worksheet.datavalidation import DataValidation
dv = DataValidation(type="list", formula1='"Yes,No"', allow_blank=False, showDropDown = True)
rangevar = 'R1:T'+ str(maxrow)
dv.ranges.append(rangevar)
wb.save(path)
print str(id), rangevar
Code for Basic Sheet
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
sheet.title = 'testsheet'
path = '#filepath'
from openpyxl.worksheet.datavalidation import DataValidation
dv = DataValidation(type="list", formula1='"Yes,No"', allow_blank=False, showDropDown = True)
dv.ranges.append('A1')
wb.save(path)
You are missing to add the dv to the worksheet.
>>> # Add the data-validation object to the worksheet
>>> ws.add_data_validation(dv)
Read the docs about validation
Related
import xlrd
import os
class datafromexcel:
def __init__(self) -> None:
rootPath = os.getcwd()
loc = (rootPath+"Investment.xlsx");
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
list = [];
for i in range(sheet.nrows):
#print(sheet.cell_value(i, 0),sheet.cell_value(i, 1))
list.append(Investment(sheet.cell_value(i, 0),sheet.cell_value(i, 1)));
print("Successfully retrieved all excel data");
I recommend you to use Pandas library and read your excel file with it
import pandas as pd
pd.read_excel('EXCEL_FILE.xlsx', index_col=0)
for more details: https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html
I have the next code:
import PyPDF2
import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.worksheet.cell_range import CellRange
#Definir Archivo Excel
file = 'C:\\Users\\Desktop\\PYTHON_PDF\\PRUEBA_PDF.xlsx'
wb = openpyxl.load_workbook(file)#define workook
ws = wb['Hoja1']#define sheet1
#define max values for columns
max_row_for_a = max((a.row for a in ws['A'] if a.value is not None))
max_row_for_b = max((b.row for b in ws['B'] if b.value is not None))
#LOOP
for row in ws.iter_rows(min_row=2, max_col=1, max_row=max_row_for_a):
pdf= row[0].value
print (pdf)
for row2 in ws.iter_rows(min_row=2, max_col=2, max_row=max_row_for_b, min_col=2):
extracto = row2[0].value
pdfselect=open(pdf,"rb")
leer = PyPDF2.PdfFileReader(pdfselect)
pagina = leer.getPage(0)
extracto = pagina.extractText()
print(extracto)
wb.save("PRUEBA_PDF2.xlsx")
wb.close
The idea is read the excel's column A with the pdf's names and write in the column B the pdf extracts, but when i execute the code don't show anything, even dont show error. I try with a minimal case like this and i dont have problems:
#pdfselect=open("ejemplo.pdf","rb")
#leer = PyPDF2.PdfFileReader(pdfselect)
#pagina = leer.getPage(0)
#sheet = wb.active
#ws['B2'] = pagina.extractText()
#wb.save("PRUEBA_PDF2.xlsx")
#wb.close
what i'm doing wrong?
Greetings!
I found the problem. The correct code it must like this:
import PyPDF2
import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.worksheet.cell_range import CellRange
#Definir Archivo Excel
file = 'C:\\Users\\lorrego\\Desktop\\PYTHON_PDF\\PRUEBA_PDF.xlsx'
wb = openpyxl.load_workbook(file)#define workook
ws = wb['Hoja1']#define la hoja 1
max_row_for_a = max((a.row for a in ws['A'] if a.value is not None))
for row in ws.iter_rows(min_row=2, max_col=2, max_row=max_row_for_a):
pdf= row[0].value
pdfselected=open(pdf,"rb")
leer = PyPDF2.PdfFileReader(pdfselected)
pagina = leer.getPage(0)
ws.cell(row=row[0].row, column=2).value = pagina.extractText()
wb.save("C:\\Users\\Desktop\\PYTHON_PDF\\PRUEBA_PDF.xlsx")
wb.close
have dataframe how to encrypt it with a password
import pandas as pd
# intialise data of lists.
data = {'Name':['Tom', 'nick', 'krish', 'jack'], 'Age':[20, 21, 19, 18]}
# Creating DataFrame
df = pd.DataFrame(data)
are there any pandas options to add a password excel or csv which is created with data frame.
So that a password is required to open that csv or excel with GUI.
Tried this:
df.to_excel('123.xlsx')
from openpyxl import Workbook
from openpyxl import load_workbook
test_spreadsheet = "123.xlsx"
wb = load_workbook(test_spreadsheet)
wb.security.workbookPassword = "password"
from openpyxl import Workbook
from openpyxl import load_workbook
test_spreadsheet = "123.xlsx"
wb = load_workbook(test_spreadsheet)
ws = wb.worksheets[0]
ws.protection
ws.protection.set_password('test')
wb.save('12344.xlsx')
from openpyxl import load_workbook
wb = load_workbook(filename = '123.xlsx')
wb.security.workbookPassword = 'test'
wb.security.lockStructure = True
wb.save('123_password.xlsx')
wb = Workbook('123.xlsx')
ws = wb.worksheets[0]
ws.protect('abc123.xlsx')
But when i open it. the file is opening without any prompt of password . tried in google sheets and libre office
Found out that : Openpyxl's WorkbookProtection only works for preventing modifying sheets that are there.
os: Linux
I have a series of CSV file like this one. I’m trying to convert and merge them into an xlsx file with python and openpyxl with this code:
import csv
import openpyxl
import glob
csvpath = 'C:/Users/Lorenzo/Downloads/CSV/'
csvfiles = glob.glob(csvpath + '*.csv')
data = input('Inserisci data Simulazione: ')
destinationfilepath = 'C:/Users/Lorenzo/Desktop/Simulazione_' + data + '.xlsx'
wb = openpyxl.Workbook()
for i in range(len(csvfiles)):
filename = csvfiles[i]
reader = csv.reader(open(filename), delimiter=',')
csvname = filename[len(csvpath):-4]
ws1 = wb.create_sheet(csvname)
k=0
for row in reader:
if k==0:
ws1.append(row)
else:
g=0
for cell in row:
c= ws1.cell(row=k, column=g)
c.value = float(cell)
g=g+1
k=k+1
ws1['A1'] = 'Iteration'
ws1['B1'] = 'CD'
ws1['C1'] = 'CL'
ws1['D1'] = 'CL_F'
ws1['E1'] = 'CL_R'
ws1['F1'] = 'CM'
sheet = wb['Sheet']
wb.remove(sheet)
wb.save(destinationfilepath)
The code runs but in most cells (and strangely enough not in all cells) I get the error “number stored as text” despite using the command float like suggested in this and similar topics.
What is that I'm doing wrong?
How to find total number of rows using XLWT or XLRD in Python? I have an excel file(accounts.xls) and would like to append rows in it.
I am getting an error here - AttributeError: 'Sheet' object has no attribute 'write'
from xlrd import open_workbook
from xlwt import Workbook
def saveWorkSpace(fields,r):
wb = open_workbook('accounts.xls')
ws = wb.sheet_by_index(0)
r = ws.nrows
r += 1
wb = Workbook()
ws.write(r,0,fields['name'])
ws.write(r,1,fields['phone'])
ws.write(r,2,fields['email'])
wb.save('accounts.xls')
print 'Wrote accounts.xls'
Here is the solution of the above question
import xlrd
import xlwt
from xlutils.copy import copy
def saveWorkSpace(fields):
rb = xlrd.open_workbook('accounts.xls',formatting_info=True)
r_sheet = rb.sheet_by_index(0)
r = r_sheet.nrows
wb = copy(rb)
sheet = wb.get_sheet(0)
sheet.write(r,0,fields['name'])
sheet.write(r,1,fields['phone'])
sheet.write(r,2,fields['email'])
wb.save('accounts.xls')
print 'Wrote accounts.xls'
Python Program to add Values to the last data row an Excel sheet.
from xlwt import Workbook
from xlrd import open_workbook
import openpyxl
# Function to get the last RowCount in the Excel sheet , change the index of the sheet accordingly to get desired sheet.
def getDataColumn():
#define the variables
rowCount=0
columnNumber=0
wb = open_workbook('C:\\Temp\\exp\\data.xlsx')
ws = wb.sheet_by_index(0)
rowCount = ws.nrows
rowCount+=1
columnNumber=1
print(rowCount)
writedata(rowCount,columnNumber)
#Data to specified cells.
def writedata(rowNumber,columnNumber):
book = openpyxl.load_workbook('C:\\Temp\\exp\\data.xlsx')
sheet = book.get_sheet_by_name('Sheet1')
sheet.cell(row=rowNumber, column=columnNumber).value = 'Appended Data'
book.save('C:\\Temp\\exp\\data.xlsx')
print('saved')
getDataColumn()
exit()