Conversion from XSLX to TXT asks for exit method - python

As per other posts I saw on stackoverflow, I wrote the following code to convert an XLSX to TXT however it throws: AttributeError: exit :
import xlrd
import csv
with xlrd.open_workbook('data.xlsx').sheet_by_index(0) as in_xslx:
in_reader = csv.reader(in_xslx)
with open("data.txt", "w", newline='', encoding='utf8') as out_text:
out_writer = csv.writer(out_text, delimiter = '\t')
for row in in_reader:
out_writer.writerow(row)
However it successfully converts a CSV if I replace the first two rows with:
with open("data.csv", "r", encoding='utf-8') as in_csv:
in_reader = csv.reader(in_csv)
Any idea why is that happening when converting XSLX->TXT and how to correct?
Thank you

What you need is:
import xlrd
import csv
with open("data.txt", "w") as out_text:
# define output writer
out_writer = csv.writer(out_text, delimiter = '\t')
# Open and read an Excel file
data_file = xlrd.open_workbook('data.xlsx')
# get the first worksheet
worksheet= data_file.sheet_by_index(0)
# get the row values and write into output file
for rownum in xrange( worksheet.nrows ):
out_writer.writerow(worksheet.row_values(rownum))

Related

Unable to lowercase the header of csv file

I'm trying to make the first row/header lowercase, in multiple csv files in a directory using python. The code and error are below. Is there any way to fix the code or some other way?
import csv
import glob
path = (r'C:\Users\Documents')
for fname in glob(path):
with open(fname, newline='') as f:
reader = csv.reader(f)
row1 = next(reader)
for row1 in reader:
data = [row1.lower() for row1 in row1]
os.rename(row1, data)
The error is:
TypeError: rename: src should be string, bytes or os.PathLike, not list
I think you're getting rows and columns mixed-up. Here's some untested code that does what you want, I think:
import csv
from glob import glob
path = (r'C:\Users\Documents\*.csv') # Note wildcard character added for glob().
for fname in glob(path):
with open(fname, newline='') as f:
reader = csv.reader(f)
header = next(reader) # Get the header row.
header = [column.lower() for column in header] # Lowercase the headings.
rows = [header] + list(reader) # Read the rest of the rows.
with open(fname, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(rows) # Write new header & original rows back to file.

how to remove a specific row in csv file based upon the duplicate using python?

I have a csv file which has many rows looks like below.
20170718 014418.475476 [UE:142 CRNTI : 446]
20170718 094937.865362 [UE:142 CRNTI : 546]
Above are the sample two rows of the csv file.
Now if we see the rows there is a string called [UE:142...] which repeats in the csv file.
Problem statement:
I want to remove the duplicate row which contains string [UE:< > more than once in that csv file i.e in the above rows the string [UE:142 repeated twice so the second one must get deleted, in this way there are many random strings like [UE:142 .
Can anyone please help me with python script for the above problem statement?
import csv
reader = open("test.csv", "r")
lines = reader.read().split(" ")
reader.close()
writer = open("test_1.csv", "w")
for line in set(lines):
writer.write(line)
writer.close()
from csv import reader, writer as csv_writer
csv_path = '<your csv file path here>'
def remove_duplicate_ue (csv_path):
found = False
with open (csv_path, 'r') as csv_file:
for line in reader (csv_file, delimiter = ' '):
if 'UE:' not in line [-1]:
yield line
elif not found:
yield line
found = True
def write_csv (csv_path, rows, delimiter = ' '):
with open (csv_path, 'w') as csv_file:
writer = csv_writer (csv_file, delimiter = delimiter)
for row in rows:
writer.writerow (row)
write_csv (csv_path, tuple (remove_duplicate_ue (csv_path)))

converting csv file to another csv choosing specific columns python

i am trying to convert a csv file with the following columns:
ID,Name,Postcode,State,Suburb,Lat,Lon
1,Hurstville Store,1493,NSW,Hurstville,-33.975869,151.088939
I want to make a new csv with only the Name, Lat, Lon columns but im getting this error:
header = csvReader.next()
AttributeError: '_csv.reader' object has no attribute 'next'
here is my code so far:
import csv
# Set up input and output variables for the script
storeLoc = open("store_locations.csv", "r")
# Set up CSV reader and process the header
csvReader = csv.reader(storeLoc)
header = csvReader.next()
nameIndex = header.index("Name")
latIndex = header.index("Lat")
lonIndex = header.index("Lon")
# Make an empty list
coordList = []
# Loop through the lines in the file and get each coordinate
for row in csvReader:
name = row[nameIndex]
lat = row[latIndex]
lon = row[lonIndex]
coordList.append([name,lat,lon])
# Print the coordinate list
print(coordList)
coordList.append([name,lat,lon])
stores = open('store_coords.csv','w', newline='')
thanks for any feedback
That code will work in Python 2, i.e csv.reader objects have a next() method. However, in Python 3 there is no such method.
Instead, and this works in both versions of Python, use next(reader):
import csv
# Set up input and output variables for the script
storeLoc = open("store_locations.csv", "r")
# Set up CSV reader and process the header
csvReader = csv.reader(storeLoc)
header = next(csvReader)
Here is a concise way of writing it using the csv module:
import csv
from operator import itemgetter
name_lat_lon = itemgetter(1, 5, 6)
with open('store_locations.csv') as infile, open('store_coords.csv', 'w') as outfile:
csv.writer(outfile).writerows(name_lat_lon(row) for row in csv.reader(infile))
More concise still:
import csv
with open('store_locations.csv') as infile, open('store_coords.csv', 'w') as outfile:
csv.writer(outfile).writerows((row[1], row[5], row[6]) for row in csv.reader(infile))
Or even more so if certain assumptions are made about the CSV delimiter:
with open('store_locations.csv') as infile, open('store_coords.csv', 'w') as outfile:
outfile.writelines(','.join((row[1], row[5], row[6])) for row in (line.split(',') for line in infile))

How to Sort a Column in an Excel sheet

So I understand how sorting works in Python. If I put...
a = (["alpha2A", "hotel2A", "bravo2C", "alpha2B", "tango3B", "alpha3A", "zulu.A1", "foxtrot8F", "zulu.B1"]
a.sort()
print a
I will get...
'alpha2A', 'alpha2B', 'alpha3A', 'bravo2C', 'foxtrot8F', 'hotel2A', 'tango3B', 'zulu.A1', 'zulu.B1']
However, I want to sort a column in a Excel sheet so I tried...
isv = open("case_name.csv", "w+")
a = (["case_name.csv"[2]])
a.sort()
print a
And got a return of...
['s']
I understand that it is returning the 3rd letter in the file name but how do I make it sort and return the entire column of the Excel sheet?
Update: New Code
import csv
import operator
with open('case_name.csv') as infile:
data = list(csv.reader(infile, dialect=csv.excel_tab))
data.sort(key=operator.itemgetter(2))
with open('case_name_sorted.csv', 'w') as outfile:
writer = csv.writer(outfile, dialect='excel')
writer.writerows(data)
print(sum(1 for row in data if len(row) < 3))
And it returns
data = list(csv.reader(infile, dialect=csv.excel_tab))
_csv.Error: new-line character seen in unquoted field - do you need to open the file in universal-newline mode?
import csv
import oprator
# read the data from the source file
with open('case_name.csv') as infile:
data = list(csv.reader(infile, dialect='excel'))
# sort a list of sublists by the item in index 2
data.sort(key=operator.itemgetter(2))
# time to write the results into a file
with open('case_name_sorted.csv', 'w') as outfile:
writer = csv.writer(outfile, dialect='excel')
writer.writerows(data)

Python script to turn input csv columns into output csv row values

I have an input csv that look like
email,trait1,trait2,trait3
foo#gmail,biz,baz,buzz
bar#gmail,bizzy,bazzy,buzzy
foobars#gmail,bizziest,bazziest,buzziest
and I need the output format to look like
Indv,AttrName,AttrValue,Start,End
foo#gmail,"trait1",biz,,,
foo#gmail,"trait2",baz,baz,,
foo#gmail,"trait3",buzz,,,
For each row in my input file I need to write a row for the N-1 columns in the input csv. The Start and End fields in the output file can be empty in some cases.
I'm trying to read in the data using a DictReader. So for i've been able to read in the data with
import unicodecsv
import os
import codecs
with open('test.csv') as csvfile:
reader = unicodecsv.csv.DictReader(csvfile)
outfile = codecs.open("test-write", "w", "utf-8")
outfile.write("Indv", "ATTR", "Value", "Start","End\n")
for row in reader:
outfile.write([row['email'],"trait1",row['trait1'],'',''])
outfile.write([row['email'],"trait2",row['trait2'],row['trait2'],''])
outfile.write([row['email'],"trait3",row['trait3'],'','')
Which doesn't work. (I think I need to cast the list to a string), and is also very brittle as I'm hardcoding the column names for each row. The bigger issue is that the data within the for loop isn't written to "test-write". Only the line
outfile.write("Indv", "ATTR", "Value", "Start","End\n") actually write out to the file. Is DictReader the appropriate class to use in my case?
This uses a unicodecsv.DictWriter and the zip() function to do what you want, and the code is fairly readable in my opinion.
import unicodecsv
import os
import codecs
with open('test.csv') as infile, \
codecs.open('test-write.csv', 'w', 'utf-8') as outfile:
reader = unicodecsv.DictReader(infile)
fieldnames = 'Indv,AttrName,AttrValue,Start,End'.split(',')
writer = unicodecsv.DictWriter(outfile, fieldnames)
writer.writeheader()
for row in reader:
email = row['email']
trait1, trait2, trait3 = row['trait1'], row['trait2'], row['trait3']
writer.writerows([ # writes three rows of output from each row of input
dict(zip(fieldnames, [email, 'trait1', trait1])),
dict(zip(fieldnames, [email, 'trait2', trait2, trait2])),
dict(zip(fieldnames, [email, 'trait3', trait3]))])
Here's the contents of the test-write.csv file it produced from your example input csv file:
Indv,AttrName,AttrValue,Start,End
foo#gmail,trait1,biz,,
foo#gmail,trait2,baz,baz,
foo#gmail,trait3,buzz,,
bar#gmail,trait1,bizzy,,
bar#gmail,trait2,bazzy,bazzy,
bar#gmail,trait3,buzzy,,
foobars#gmail,trait1,bizziest,,
foobars#gmail,trait2,bazziest,bazziest,
foobars#gmail,trait3,buzziest,,
I may be completely off since I don't do a lot of work with unicode, but it seems to me that the following should work:
import csv
with open('test.csv', 'ur') as csvin, open('test-write', 'uw') as csvout:
reader = csv.DictReader(csvin)
writer = csv.DictWriter(csvout, fieldnames=['Indv', 'AttrName',
'AttrValue', 'Start', 'End'])
for row in reader:
for traitnum in range(1, 4):
key = "trait{}".format(traitnum)
writer.writerow({'Indv': row['email'], 'AttrName': key,
'AttrValue': row[key]})
import pandas as pd
pd1 = pd.read_csv('input_csv.csv')
pd2 = pd.melt(pd1, id_vars=['email'], value_vars=['trait1','trait2','trait3'], var_name='AttrName', value_name='AttrValue').rename(columns={'email': 'Indv'}).sort(columns=['Indv','AttrName']).reset_index(drop=True)
pd2.to_csv('output_csv.csv', index=False)
Unclear on what the Start and End fields represent, but this gets you everything else.

Categories

Resources