Write python list of files names to excel using openpyxl - python

I have been trying to get the name of files in a folder on my computer and open an excel worksheet and write the file names in a specific column. However, it returns to me the following message of error. "TypeError: Value must be a list, tuple, range or generator, or a dict. Supplied value is <class 'str'>".
The code is:
from openpyxl import load_workbook
import os
import glob, os
os.chdir("/content/drive/MyDrive/picture")
ox = []
for file in glob.glob("*.*"):
for j in range(0, 15):
replaced_text = file.replace('.JPG', '')
ox.append(replaced_text)
oxx = ['K', ox] #k is a column
file1 = load_workbook(filename = '/content/drive/MyDrive/Default.xlsx')
sheet1 = file1['Enter Data Draft']
for item in oxx:
sheet1.append(item)

I've taken a slightly different approach but looking at your code the problem is with the looping.
The problem.
for item in oxx: sheet1.append(item)
When looping over the items in oxx, there are two items. 'K' and then a list with filenames (x15 each) in it. Openpyxl was expecting a different data structure for append. Its actually after a tuple of tuples. documentation here.
The solution
So not knowing what other data you might have on the worksheet I've changed the approach to hopefully satisfy the expected outcome.
I got the following to work as expected.
from openpyxl import load_workbook
import os
import glob, os
os.chdir("/content/drive/MyDrive/picture")
ox = []
for file in glob.glob("*.*"):
for j in range(0, 15): # I've kept this in here assuming you wanted to list the file name 15 times?
replaced_text = file.replace('.JPG', '')
ox.append(replaced_text)
file_dir = '/content/drive/MyDrive/Default.xlsx'
file1 = load_workbook(filename = file_dir)
sheet1 = file1['Enter Data Draft']
# If you were appending to the bottom of a list that was already there use this
# last_row = len(sheet1['K'])
# else use this
last_row = 1 # Excel starts at 1, adjust if you had a header in that column
for counter, item in enumerate(ox):
# K is the 11th column.
sheet1.cell(row=(last_row + counter), column=11).value = item
# Need to save the file or changes wont be reflected
file1.save(file_dir)

Related

Attempting to read_csv over a list generates a b literal I can't get rid of

I have a list of countries with corresponding .csv files. When I attempt to read_csv iterated over the list with a for loop, I get an error.
I tried generating an empty dict first and making a dict of dataframes, I tried using decode, I tried using item = r'{}.csv'.format(file) instead of just item = '{}.csv'.format(file).
import pandas as pd
import string as str
fileslist = []
with open('data/files.txt') as f:
for line in f:
fileslist.append(f.readline().strip())
for file in fileslist:
item = '{}.csv'.format(file)
print(item)
item = pd.read_csv(item)
This should give me a number of dataframes starting with a dataframe named algeria. Instead I get the error "FileNotFoundError: File b'algeria.csv' does not exist".
This code may help you
import os
import pandas as pd
fileslist = []
with open("data/files.txt", mode='r', encoding="utf-8") as fp:
for line in fp.readlines():
fileslist.append(line.strip())
for file in fileslist:
# make sure your files are in same directory
# if they are in data folder then don't forget to add 'data/{}.csv'.format(file)
item = '{}.csv'.format(file)
if os.path.isfile(item):
item = pd.read_csv(item)

'float' object is not iterable typerror

I've written a script that takes a large excel spreadsheet of data and strips away unwanted columns, rows that contain zero values in particular columns and then saves out to a csv. The piece that I'm stuck on is I'm also trying to remove rows that are missing cells. The way I was trying this was by way of:
for each_row in row_list :
if not all(map(len, each_row)) :
continue
else :
UICData.append(row_list)
But this isn't working correctly as I'm getting the error:
File
"/Users/kenmarold/PycharmProjects/sweetCrude/Work/sweetCrude.py",
line
56, in PrepareRawData
if not all(map(len, each_row)) :
TypeError: 'float' object is not iterable
I'm not exactly sure how to resolve this, what's the way forward on this? I've also attached the full script below.
#!/usr/bin/env python3
import os
import sqlite3
import csv
import unicodecsv
from datetime import date
from xlrd import open_workbook, xldate_as_tuple
from xlwt import Workbook
orig_xls = 'data/all_uic_wells_jun_2016.xls'
temp_xls = 'data/temp.xls'
new_csv = 'data/gh_ready_uic_well_data.csv'
temp_csv = 'data/temp.csv'
input_worksheet_index = 0 # XLS Sheet Number
output_workbook = Workbook()
output_worksheet = output_workbook.add_sheet('Sweet Crude')
lat_col_index = 13
long_col_index = 14
#### SELECT AND FORMAT DATA
def PrepareRawData(inputFile, tempXLSFile, tempCSVFile, outputFile):
# 0 = API# # 7 = Approval Date
# 1 = Operator # 13 = Latitude
# 2 = Operator ID # 14 = Longitude
# 3 = Well Type # 15 = Zone
keep_columns = [0, 1, 2, 3, 7, 13, 14, 15]
with open_workbook(inputFile) as rawUICData:
UICSheet = rawUICData.sheet_by_index(input_worksheet_index)
UICData = []
for each_row_index in range(1, UICSheet.nrows - 1, 1):
row_list = []
lat_num = UICSheet.cell_value(each_row_index, lat_col_index) # Get Lat Values
long_num = UICSheet.cell_value(each_row_index, long_col_index) # Get Long Values
if lat_num != 0.0 and long_num != 0.0: # Find Zero Lat/Long Values
for each_column_index in keep_columns:
cell_value = UICSheet.cell_value(each_row_index, each_column_index)
cell_type = UICSheet.cell_type(each_row_index, each_column_index)
if cell_type == 3:
date_cell = xldate_as_tuple(cell_value, rawUICData.datemode)
date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
row_list.append(date_cell)
else:
row_list.append(cell_value)
for each_row in row_list :
if not all(map(len, each_row)) :
continue
else :
UICData.append(row_list)
# CreateDB(row_list) # Send row data to Database
for each_list_index, output_list in enumerate(UICData):
for each_element_index, element in enumerate(output_list):
output_worksheet.write(each_list_index, each_element_index, element)
output_workbook.save(tempXLSFile)
#### RUN XLS-CSV CONVERSION
workbook = open_workbook(tempXLSFile)
sheet = workbook.sheet_by_index(input_worksheet_index)
fh = open(outputFile, 'wb')
csv_out = unicodecsv.writer(fh, encoding = 'utf-8')
for each_row_number in range(sheet.nrows) :
csv_out.writerow(sheet.row_values(each_row_number))
fh.close()
#### KILL TEMP FILES
filesToRemove = [tempXLSFile]
for each_file in filesToRemove:
os.remove(each_file)
print("Raw Data Conversion Ready for Grasshopper")
# ---------------------------------------------------
PrepareRawData(orig_xls, temp_xls, temp_csv, new_csv)
# ---------------------------------------------------
This is a dirty patch.
for each_row in row_list :
if not isinstance(each_row, list):
each_row = [each_row]
if not any(map(len, each_row)) :
continue
UICData.append(row_list)
EDIT: If the any/map/len raises it still, then I would try a different route to check if it's empty.
Also I'm not sure why you are appending the entire row_list and not the current row. I changed it to appending each_row.
Option1
for each_row in row_list:
if not each_row:
continue
UICData.append(each_row)
Option2
keep_data = [arow in row_list if arow] # Or w/e logic. This will be faster.
UICData.append(keep_data)
Your row_list contains a set of values, for example:
[1.01, 75, 3.56, ...]
When you call for each_row in row_list:, you assign a float value to each_row for every iteration of the loop.
You then try to do this:
if not all(map(len, each_row)):
Python's map function expects a list as the second argument, and tries to iterate over it to apply the function len to each item in the list. You can't iterate a float.
I'm not entirely sure what you are trying to do here, but if you are wanting to check that none of the items in your row_list are None or an empty string, then you could do:
if None not in row_list and '' not in row_list:
UICData.append(row_list)
Your overall object appears to be to copy selected columns from all rows of one sheet of an Excel XLS file to a CSV file. Each output row must contain only valid cells, for some definition of "valid".
As you have seen, using map() is not a good idea; it's only applicable if all the fields are text. You should apply tests depending generally on the datatype and specifically on the individual column.
Once you have validated the items in the row, you are in a position to output the data. You have chosen a path which (1) builds a list of all output rows (2) uses xlwt to write to a temp XLS file (3) uses xlrd to read the temp file and unicodecsv to write a CSV file. Please consider avoiding all that; instead just use unicodecsv.writer.writerow(row_list)

How to convert strings to numbers, when taking data from .csv to .xlsx, using openpyxl

I wrote this code to take info I have stored inside a .csv file and put it into a .xlsx file. When it is in the excel doc all of the information are strings. I can't figure out a way to change them into numbers so I can then use openpyxl in order to create a line graph. Here is the code
import openpyxl
import csv
examplefile = open('output.csv')
exampleReader = csv.reader(examplefile)
exampleData = list(exampleReader)
wb = openpyxl.load_workbook('open.xlsx')
ws = wb.get_sheet_by_name('Sheet1')
for i in range (1,9):
for h in range(1, 5):
a = i-1
b = h-1
ws.cell(row=i, column=h).value = exampleData[a][b]
wb.save('practice.xlsx')
and here is the csv file
Date,Temp,Min Temp,Max Temp
2016-07-11,288.69,287.68,288.69
2016-07-12,289.55,288.79,289.55
2016-07-13,294.3,293.79,294.3
2016-07-14,296.35,296.098,296.35
2016-07-15,291.824,291.824,291.824
2016-07-16,293.373,293.373,293.373
2016-07-17,291.808,291.808,291.808
also I know that this is not the most efficient way to get the data from a to b, so if there are any recommendations on how to do that better, please include those.
The CSV reader reads the file into a 2D list, where each row represents a line. The data is read in and stored as strings. So you'd need to explicitely cast the values to the desired type, something like this:
ws.cell(row=i, column=h).value = float(exampleData[a][b])
Look here for further explanation: Python import csv to list

Getting my output into another excel file

import os, sys
from xlrd import open_workbook
from xlutils.copy import copy
from xlwt import easyxf, Style
import time
rb = open_workbook('A1.xls', on_demand=True,formatting_info =True)
rs = rb.sheet_by_index(0)
wb = copy(rb)
ws = wb.get_sheet(0)
start =time.time()
g1 = dict()
for row in range(1,rs.nrows):
for cell in row:
cellContent = str(cell.value)
if cellContent not in g1.keys():
g1[cellContent]=1
else:
g1[cellContent]=g1[cellContent]+1
for cellContent in g1.keys():
print cellContent, g1[cellContent]
ws.write(row,1, cellContent)
wb.save('A2.xls')
When I run this code, I get the error message cell object not iterable
What could have gone wrong?
I am not familiar myself with xlrd or any of the other modules, but doing any work with csv or excel spreadsheets, I use Pandas, specifically this link. It allows you to easily read and make all sorts of modifications, and then write it out very easily as well. If all you wanted was to copy it would be really easy.
The problem you've got is that row is an integer, as it's populated using for row in range(1, rs.nrows): where the range() function returns an integer - In your case what I presume is each row number between 1 and the number of rows in your spreadsheet.
I'm not familiar with how the xlrd, xlutils and xlwt modules work, but I'd imagine you want to do something more like the following:
for row_number in range(1, rs.nrows):
row = rs.row(row_number)
for cell in row:
....
The Sheet.row(rowx) method gives you a sequence of Cell objects that you can iterate in your inner loop.

Dynamicaly Build Python lists from Sheets in Excel Workbook

I am attempting to compress some code I previous wrote in python. I have some drawn out code that loops through a number of lookup tables in an excel workbook. There are about 20 sheets that contain lookup tables in the workbook. I want to loop through the values in each lookup table and add them to their own list. My existing code looks like this:
test1TableList = []
for row in arcpy.SearchCursor(r"Z:\Excel\LOOKUP_TABLES.xlsx\LookupTable1$"):
test1TableList.append(row.Code)
test2TableList = []
for row in arcpy.SearchCursor(r"Z:\Excel\LOOKUP_TABLES.xlsx\LookupTable1$"):
test2TableList.append(row.Code)
test3TableList = []
for row in arcpy.SearchCursor(r"Z:\Excel\LOOKUP_TABLES.xlsx\LookupTable1$"):
test3TableList.append(row.Code)
test4TableList = []
for row in arcpy.SearchCursor(r"Z:\Excel\LOOKUP_TABLES.xlsx\LookupTable1$"):
test4TableList.append(row.Code)
test5TableList = []
for row in arcpy.SearchCursor(r"Z:\Excel\LOOKUP_TABLES.xlsx\LookupTable1$"):
test5TableList.append(row.Code)
yadda yadda
I want to compress that code (maybe in a function).
Issues to resolve:
Sheet names are all different. I need to loop through each sheet in the excel workbook in order to a) grab the sheet object and b) use the sheet name as part of the python list variable name
I want each list to remain in memory for use further along the code
I've been trying something like this, which work but the python list variables don't seem to stay in memory:
import arcpy, openpyxl
from openpyxl import load_workbook, Workbook
wb = load_workbook(r"Z:\Excel\LOOKUP_TABLES.xlsx")
for i in wb.worksheets:
filepath = r"Z:\Excel\LOOKUP_TABLES.xlsx" + "\\" + i.title + "$"
varList = []
with arcpy.da.SearchCursor(filepath, '*') as cursor:
for row in cursor:
varList.append(row[0])
# This is the area I am struggling with. I can't seem to find a way to return
# each list into memory. I've tried the following code to dynamically create
# variable names from the name of the sheet so that each list has it's own
# variable. After the code has run, I'd just like to set a print statement
# (i.e. print variablename1) which will return the list contained in the variable
newList = str(i.title) + "List"
newList2 = list(varList)
print newList + " = " + str(newList2)
I've been working on this for a while and I have no doubt, at this point, i am over thinking my solution but I'm at a block. Any recommendations are welcome!
Not sure if it is the best for you, but you could use pandas to import your sheets into a dataframes.
from pandas.io.excel import ExcelFile
filename = 'linreg.xlsx'
xl = ExcelFile(filename)
for sheet in xl.sheet_names:
df = xl.parse(sheet)
print df
Instead of having breeding lists, use a dictionary for collecting the data per-sheet:
import arcpy
from openpyxl import load_workbook
wb = load_workbook(r"Z:\Excel\LOOKUP_TABLES.xlsx")
sheets = {}
for i in wb.worksheets:
filepath = r"Z:\Excel\LOOKUP_TABLES.xlsx" + "\\" + i.title + "$"
with arcpy.da.SearchCursor(filepath, '*') as cursor:
sheets[i.title] = [row[0] for row in cursor]
print sheets

Categories

Resources