replace string with string in excel - python

I want to compare the string in text file with the first colum in excel sheet
if the string in the file match any value in the first column I want to replace this string with the value in second column and save the changes to the text file
import numpy as np
wb = openpyxl.load_workbook('D:\\example.xlsx')
ws = wb.active
v = "D:\\A2.txt"
v = open(v).read()
row = 1
cell_addr = "A" + str(row)
next_cell = "B" + str(row)
while ws[cell_addr].value is not None:
# print(cell_addr, ws[cell_addr].value, type(ws[cell_addr].value))
j=1
for i in v:
if i == ws[cell_addr].value:
v.replace(i, ws[next_cell].value)
else:
print (i)
with open('D:\\A2.txt', 'w') as f:
f.write(i)
row += 1
cell_addr = "A" + str(row)
next_cell = "B" + str(row)

Something like this?
import openpyxl
xlsfile = 'example.xlsx'
txtfile = 'A2.txt'
with open(txtfile, 'r') as f:
v = f.read().split('\n')
new_v = list(v)
wb = openpyxl.load_workbook(xlsfile)
ws = wb.active
for cells in ws.rows:
val = cells[0].value
if val in v:
index = v.index(val)
new_v[index] = cells[1].value
with open(txtfile, 'w') as f:
f.write('\n'.join(new_v))

Related

convert txt file to different sheet in excel

I wrote a code that passed information from a text file to excel.I sorted according to specific parameters I want to take from the file ("X,Y,O,P,E"). I neet to pass this information to sheet number 2. How can i do that ?
import xlsxwriter
def readFile(_path):
f = open(_path, "r")
return f.readlines()
def test():
file = open("Test.txt", 'r')
data = []
line_data = ""
for line in file:
if "_____ X" in line:
line_data = line[24:].replace("\t", "").replace("\n", "").split(":")[0]
elif "Y:" in line:
line_data = line.replace("\t", "").replace("\n", "").split(" ")[1]
elif "O:" in line:
line_data = line.replace("\t", "").replace("\n", "").split(" ")[1]
elif "P:" in line:
line_data = line.replace("\t", "").replace("\n", "").split(" ")[1]
elif "E=" in line:
line_data = line[21:].replace("\t", "").replace("\n", "").split[0]
if len(line_data) > 0:
data.append(line_data)
line_data = ""
writeToxlsx(data)
def writeToxlsx(data):
_output_path = "Book1.xlsx"
workbook = xlsxwriter.Workbook(_output_path)
worksheet = workbook.add_worksheet()
row = 0
for item in data:
worksheet.write(row, 1, item)
row += 1
workbook.close()
test()
Are you trying to write to an existing sheet? If so, you need to use openpyxl, not xlsxwriter. How to write/update data into cells of existing XLSX workbook using xlsxwriter in python
If you are writing to a new sheet, then where you have the line:
worksheet = workbook.add_worksheet()
instead write:
worksheet = workbook.add_worksheet('The Sheetname You Want')
or if you really need it to be the second sheet, you will have to create a dummy sheet for the front page. You can use the example provided here: Python XlsxWriter - Write to many sheets

how to remove entire row that has empty cells in csv file using python

First off, I do not have pandas framework and am unable to install it. I am hoping that I can solve this problem without pandas.
I am trying to clean my data using python framework, by removing rows that contain empty cells.
this is my code:
import csv
input_file = 'test.csv'
output_file = 'test1.csv'
cols_to_remove =[0,1,9,11,14,15,23,28,29,32,33,37,38,39,41,43,44,45,46,47,48,49]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r") as source: #to run and delete column
reader = csv.reader(source)
with open(output_file, "w") as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
for col_index in cols_to_remove:
del row[col_index]
writer.writerow(row)
print(row)
I have tried codes from other similar questions asked, however it prints into an empty file.
Assuming that the empty row is in fact one that looks like this
,,,,,,,,,,,,,, (and many more)
you can do the following:
import csv
input_file = 'test.csv'
output_file = 'test1.csv'
cols_to_remove =[0,1,9,11,14,15,23,28,29,32,33,37,38,39,41,43,44,45,46,47,48,49]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r") as source: #to run and delete column
reader = csv.reader(source)
with open(output_file, "w") as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
all_empty = False # new
for cell in row: # new
if len(cell) == 0: # new
all_empty = True # new
break# new
if all_empty: # new
continue # new
for col_index in cols_to_remove:
del row[col_index]
writer.writerow(row)
print(row)
try to skip row if any cell empty like this:
if any(cel is None or cel == '' for cel in row):
continue
Here's the code:
import csv
input_file = 'hey.txt'
output_file = 'test1.csv'
cols_to_remove = [0, 1, 9, 11, 14, 15, 23, 28, 29, 32, 33, 37, 38]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r+") as source: # to run and delete column
reader = csv.reader(source)
with open(output_file, "w+") as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
if any(cel is None or cel == '' for cel in row):
continue
for col_index in cols_to_remove:
try:
del row[col_index]
except Exception:
pass
writer.writerow(row)
print(row)
files and cols_to_remove were changed, please, use your own.
step of skipping might be moving after row deleting.
Checking length of row as someone suggested might not work as empty rows in csv may contain empty Strings ''. So len(row) wouldn't return 0 because it is a list of empty strings.
To simply delete empty rows in csv try adding the following check
skip_row = True
for item in row:
if item != None and item != '':
skip_row = False
if not skip_row:
# process row
Full code would be
import csv
input_file = 'test.csv'
output_file = 'test1.csv'
cols_to_remove = [0,1,9,11,14,15,23,28,29,32,33,37,38,39,41,43,44,45,46,47,48,49]
cols_to_remove = sorted(cols_to_remove, reverse=True)
row_count = 0
with open(input_file, "r") as source: #to run and delete column
reader = csv.reader(source)
with open(output_file, "w", newline='' ) as result:
writer = csv.writer(result)
for row in reader:
row_count += 1
print('\r{0}'.format(row_count)) # Print rows processed
skip_row = True
for item in row:
if item != None and item != '':
skip_row = False
if not skip_row:
for col_index in cols_to_remove:
del row[col_index]
writer.writerow(row)
print(row)
Here we check if each element of a row is None type or an empty string and decide if we should skip that row or not.
you can use this for delete all the row that have a null value,
data = data.dropna()
and this one for delete column,
data = data.drop(['column'], axis=1)

Write variable output to a specific column in a CSV?

I'm working on a Python script that scrapes data from an Excel doc, then writes the output to a .csv.
I was able to grab the data and get it to write to the .csv, but all of the data goes into the first column.
I need the bar data to go into the 4th and the foo to go into the 5th column, so I tried to use csv.reader to select the row, and this runs without error but doesn't actually write to the .csv file.
Here's my code:
import xlrd
import csv
###Grab the data
def get_row_values(workSheet, row):
to_return = []
num_cells = myWorksheet.ncols - 1
curr_cell = -1
while curr_cell < num_cells:
curr_cell += 1
cell_value = myWorksheet.cell_value(row, curr_cell)
to_return.append(cell_value)
return to_return
file_path = 'map_test.xlsx'
output = []
output_bar = []
output_foo = []
myWorkbook = xlrd.open_workbook(file_path)
myWorksheet = myWorkbook.sheet_by_name('Sheet1')
num_rows = myWorksheet.nrows - 1
curr_row = 0
column_names = get_row_values(myWorksheet, curr_row)
print len(column_names)
while curr_row < num_rows:
curr_row += 1
row = myWorksheet.row(curr_row)
this_row = get_row_values(myWorksheet, curr_row)
x = 0
while x <len(this_row):
if this_row[x] == 'x':
output.append([this_row[0], column_names[x]])
output_bar.append([column_names[x]])
output_foo.append([this_row[0]])
print output
myData = [["number", "name", "version", "bar",
"foo"]]
##### Next section is the code in question, it
####doesn't error out, but won't write to the .csv######
myFile = open("test123.csv", "w")
writer = csv.writer(myFile)
with open('test123.csv', 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
for row in reader:
row[5] = myFile.readline()
writer.writerows(output_foo)
row[4] = myFile.readline()
writer.writerows(outpu_bar)
#####This successfully writes to the csv, but
#####all data to first column#####
# myFile = open('test123.csv', 'w')
# with myFile:
# writer = csv.writer(myFile)
# writer.writerows(myData)
# #writer.writerows(output)
# writer.writerows(output_foo)
# writer.writerows(output_bar)
x += 1
print ("CSV Written")

CSV not working if I put some extra space. Getting list index out of range error

I am new in python and I am trying to getting CSV data using python code.
Every thing is working first time,But when I edit my .csv file then an error occured says:
File "D:/wamp/www/optimizer_new/new_code/optimal_lineup.py", line 310, in get_player_list
if (int(row[4]) == -1):
IndexError: list index out of range
I am just putting a extra space inside my .csv
here is my sample code:
def get_player_list(possible_name):
file_name = ""
if (len(possible_name) > 0):
file_name = possible_name
else:
file_name = 'basketball_data2.csv'
player_list = []
with open(file_name) as csvfile:
reader = csv.reader(csvfile, delimiter=',')
reader.next()
for row in reader:
if (int(row[4]) == -1):
#print("Skipping %s" % (row[0]))
continue
name = row[0]
pos_p = get_possible_positions(row[1])
c = row[2]
v = row[3]
my_p = player(int(c) / 100, float(v), name, pos_p, int(row[4]))
player_list.append(my_p)
'''
name = row['Player Name']
c = row['Salary']
v = row['FP']
pos_p = get_possible_positions(row['Pos'])
player_list.append(player(c, v, name, pos_p))
'''
return player_list
My CSV contain these columns:
Player Name,Pos,Salary,FP,Keep/exclude
Any suggestion?

Search for string in CSV Files using python and write the results

#!/usr/bin/python
import csv
import re
string_1 = ('OneTouch AT')
string_2 = ('LinkRunner AT')
string_3 = ('AirCheck')
#searched = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
print "hello Pythong! "
#def does_match(string):
# stringl = string.lower()
# return any(s in stringl for s in searched)
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
for row in reader:
found = False
for col in row:
if col in [string_1, string_2, string_3] and not found:
writer.writerow(row)
found = True
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
inFile.close()
outFile.close()
I'm trying to figure out how to search a CSV file for 3 items. If those items exist print the row. Ideally I would like only Columns 1 and 3 to print to a new file.
Sample Data File
LinkRunner AT Video,10,20
Wireless Performance Video OneTouch AT,1,2
Wired OneTouch AT,200,300
LinkRunner AT,200,300
AirCheck,200,300
I'm trying to figure out how to search a CSV file for 3 items. If
those items exist print the row. Ideally I would like only Columns 1
and 3 to print to a new file.
Try this:
import csv
search_for = ['OneTouch AT','LinkRunner AT','AirCheck']
with open('in.csv') as inf, open('out.csv','w') as outf:
reader = csv.reader(inf)
writer = csv.writer(outf, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in reader:
if row[0] in search_for:
print('Found: {}'.format(row))
writer.writerow(row)
#!/usr/bin/python
import csv
import numpy as np
class search_csv(object):
def __init__(self, infile, outfile):
infile = open(infile, 'rb')
read_infile = [i for i in csv.reader(infile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)]
self.non_numpy_data = read_infile
self.data = np.array(read_infile, dtype=None)
self.outfile = open(outfile, 'wb')
self.writer_ = csv.writer(self.outfile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
def write_to(self, matched_values):
self.writer_.writerows(matched_values)
print ' Matched Values Written '
return True
def searcher(self, items, return_cols=[0,2]): ##// items should be passed as list -> ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
find_these = np.array(items, dtype=None)
matching_y = np.in1d(self.data, find_these).reshape(self.data.shape).nonzero()[0]
matching_data = self.data[matching_y][:,return_cols]
self.write_to(matching_data)
self.outfile.close()
return True
def non_numpy_search(self, items, return_cols=[0,2]):
lst = []
for i in self.non_numpy_data:
for ii in items:
if ii in i:
z = []
for idx in return_cols:
z.append(i[idx])
lst.append(z)
break
self.write_to(lst)
return True
### now use the class ###
SEARCHING_FOR = ['OneTouch AT', 'LinkRunner AT', 'AirCheck']
IN_FILE = 'in_file.csv'
OUT_FILE = 'out_file.csv'
non_numpy_search(IN_FILE, OUT_FILE).non_numpy_search(SEARCHING_FOR)
By the phrasing of your question I'm assuming you just want to complete the task at hand and don't really care how. So copy and paste this in and use your data file as the 'IN_FILE' value and the file name you want to write to as the 'OUT_FILE' value. Place the values you want to search for in the 'SEARCHING_FOR' list as you're done.
Things to note....
SEARCHING_FOR should be a list.
the values in SEARCHING_FOR are matched EXACTLY so 'A' will not match 'a'. If you want a to use a regex or something more complex let me know.
In function 'non_numpy_search' there is a 'return_cols' parameter. It defaults to the first and 3rd column.
If you don't have numpy let me know.
#!/usr/bin/python
import csv
import re
import sys
import gdata.docs.service
#string_1 = ('OneTouch AT')
#string_2 = ('LinkRunner AT')
#string_3 = ('AirCheck')
searched = ['aircheck', 'linkrunner at', 'onetouch at']
def find_group(row):
"""Return the group index of a row
0 if the row contains searched[0]
1 if the row contains searched[1]
etc
-1 if not found
"""
for col in row:
col = col.lower()
for j, s in enumerate(searched):
if s in col:
return j
return -1
def does_match(string):
stringl = string.lower()
return any(s in stringl for s in searched)
#Opens Input file for read and output file to write.
inFile = open('data.csv', "rb")
reader = csv.reader(inFile)
outFile = open('data2.csv', "wb")
writer = csv.writer(outFile, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
#for row in reader:
# found = False
# for col in row:
# if col in [string_1, string_2, string_3] and not found:
# writer.writerow(row)
# found = True
"""Built a list of items to sort. If row 12 contains 'LinkRunner AT' (group 1),
one stores a triple (1, 12, row)
When the triples are sorted later, all rows in group 0 will come first, then
all rows in group 1, etc.
"""
stored = []
for i, row in enumerate(reader):
g = find_group(row)
if g >= 0:
stored.append((g, i, row))
stored.sort()
for g, i, row in stored:
writer.writerow(tuple(row[k] for k in (0,2))) # output col 1 & 5
#for row in reader:
# if any(does_match(col) for col in row):
# writer.writerow(row[:2]) # write only 2 first columns
# Closing Input and Output files.
inFile.close()
outFile.close()

Categories

Resources