Python issues on character encoding - python

I'm working on a program that need to take two files and merge them and write the union file as a new one. The problem is that the output file contains chars like this \xf0 or if i change some of the encodings the result is something like that \u0028. The input file are codificated in utf8. How can i print on the output file chars like "è" or "ò" and "-"
I have done this code:
import codecs
import pandas as pd
import numpy as np
goldstandard = "..\\files\file1.csv"
tweets = "..\\files\\file2.csv"
with codecs.open(tweets, "r", encoding="utf8") as t:
tFile = pd.read_csv(t, delimiter="\t",
names=['ID', 'Tweet'],
quoting=3)
IDs = tFile['ID']
tweets = tFile['Tweet']
dict = {}
for i in range(len(IDs)):
dict[np.int64(IDs[i])] = [str(tweets[i])]
with codecs.open(goldstandard, "r", encoding="utf8") as gs:
for line in gs:
columns = line.split("\t")
index = np.int64(columns[0])
rowValue = dict[index]
rowValue.append([columns[1], columns[2], columns[3], columns[5]])
dict[index] = rowValue
import pprint
pprint.pprint(dict)
ndic = pprint.pformat(dict, indent=4)
f = codecs.open("out.csv", "w", "utf8")
f.write(ndic)
f.close()
and this is example of the outputs
desired: Beyoncè
obtained: Beyonc\xe9

You are producing Python string literals, here:
import pprint
pprint.pprint(dict)
ndic = pprint.pformat(dict, indent=4)
Pretty-printing is useful for producing debugging output; objects are passed through repr() to make non-printable and non-ASCII characters easily distinguishable and reproducible:
>>> import pprint
>>> value = u'Beyonc\xe9'
>>> value
u'Beyonc\xe9'
>>> print value
Beyoncé
>>> pprint.pprint(value)
u'Beyonc\xe9'
The é character is in the Latin-1 range, outside of the ASCII range, so it is represented with syntax that produces the same value again when used in Python code.
Don't use pprint if you want to write out actual string values to the output file. You'll have to do your own formatting in that case.
Moreover, the pandas dataframe will hold bytestrings, not unicode objects, so you still have undecoded UTF-8 data at that point.
Personally, I'd not even bother using pandas here; you appear to want to write CSV data, so I've simplified your code to use the csv module instead, and I'm not actually bothering to decode the UTF-8 here (this is safe for this case as both input and output is entirely in UTF-8):
import csv
tweets = {}
with open(tweets, "rb") as t:
reader = csv.reader(t, delimiter='\t')
for id_, tweet in reader:
tweets[id_] = tweet
with open(goldstandard, "rb") as gs, open("out.csv", 'wb') as outf:
reader = csv.reader(gs, delimiter='\t')
writer = csv.reader(outf, delimiter='\t')
for columns in reader:
index = columns[0]
writer.writerow([tweets[index]] + columns[1:4] + [columns[5])
Note that you really want to avoid using dict as a variable name; it masks the built-in type, I used tweets instead.

Related

How to export Python list as csv file [duplicate]

I am trying to create a .csv file with the values from a Python list. When I print the values in the list they are all unicode (?), i.e. they look something like this
[u'value 1', u'value 2', ...]
If I iterate through the values in the list i.e. for v in mylist: print v they appear to be plain text.
And I can put a , between each with print ','.join(mylist)
And I can output to a file, i.e.
myfile = open(...)
print >>myfile, ','.join(mylist)
But I want to output to a CSV and have delimiters around the values in the list e.g.
"value 1", "value 2", ...
I can't find an easy way to include the delimiters in the formatting, e.g. I have tried through the join statement. How can I do this?
import csv
with open(..., 'wb') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(mylist)
Edit: this only works with python 2.x.
To make it work with python 3.x replace wb with w (see this SO answer)
with open(..., 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(mylist)
Here is a secure version of Alex Martelli's:
import csv
with open('filename', 'wb') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(mylist)
For another approach, you can use DataFrame in pandas:
And it can easily dump the data to csv just like the code below:
import pandas
df = pandas.DataFrame(data={"col1": list_1, "col2": list_2})
df.to_csv("./file.csv", sep=',',index=False)
The best option I've found was using the savetxt from the numpy module:
import numpy as np
np.savetxt("file_name.csv", data1, delimiter=",", fmt='%s', header=header)
In case you have multiple lists that need to be stacked
np.savetxt("file_name.csv", np.column_stack((data1, data2)), delimiter=",", fmt='%s', header=header)
Use python's csv module for reading and writing comma or tab-delimited files. The csv module is preferred because it gives you good control over quoting.
For example, here is the worked example for you:
import csv
data = ["value %d" % i for i in range(1,4)]
out = csv.writer(open("myfile.csv","w"), delimiter=',',quoting=csv.QUOTE_ALL)
out.writerow(data)
Produces:
"value 1","value 2","value 3"
Jupyter notebook
Let's say that your list name is A
Then you can code the following and you will have it as a csv file (columns only!)
R="\n".join(A)
f = open('Columns.csv','w')
f.write(R)
f.close()
You could use the string.join method in this case.
Split over a few of lines for clarity - here's an interactive session
>>> a = ['a','b','c']
>>> first = '", "'.join(a)
>>> second = '"%s"' % first
>>> print second
"a", "b", "c"
Or as a single line
>>> print ('"%s"') % '", "'.join(a)
"a", "b", "c"
However, you may have a problem is your strings have got embedded quotes. If this is the case you'll need to decide how to escape them.
The CSV module can take care of all of this for you, allowing you to choose between various quoting options (all fields, only fields with quotes and seperators, only non numeric fields, etc) and how to esacpe control charecters (double quotes, or escaped strings). If your values are simple, string.join will probably be OK but if you're having to manage lots of edge cases, use the module available.
This solutions sounds crazy, but works smooth as honey
import csv
with open('filename', 'wb') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL,delimiter='\n')
wr.writerow(mylist)
The file is being written by csvwriter hence csv properties are maintained i.e. comma separated.
The delimiter helps in the main part by moving list items to next line, each time.
Here is working copy-paste example for Python 3.x with options to define your own delimiter and quote char.
import csv
mylist = ['value 1', 'value 2', 'value 3']
with open('employee_file.csv', mode='w') as employee_file:
employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
employee_writer.writerow(mylist)
This will generate employee_file.csv that looks like this:
"value 1","value 2","value 3"
NOTE:
If quoting is set to csv.QUOTE_MINIMAL, then .writerow() will quote
fields only if they contain the delimiter or the quotechar. This is
the default case.
If quoting is set to csv.QUOTE_ALL, then .writerow() will quote all
fields.
If quoting is set to csv.QUOTE_NONNUMERIC, then .writerow() will quote
all fields containing text data and convert all numeric fields to the
float data type.
If quoting is set to csv.QUOTE_NONE, then .writerow() will escape
delimiters instead of quoting them. In this case, you also must
provide a value for the escapechar optional parameter.
To create and write into a csv file
The below example demonstrate creating and writing a csv file.
to make a dynamic file writer we need to import a package import csv, then need to create an instance of the file with file reference
Ex:- with open("D:\sample.csv","w",newline="") as file_writer
here if the file does not exist with the mentioned file directory then python will create a same file in the specified directory, and "w" represents write, if you want to read a file then replace "w" with "r" or to append to existing file then "a". newline="" specifies that it removes an extra empty row for every time you create row so to eliminate empty row we use newline="", create some field names(column names) using list like fields=["Names","Age","Class"], then apply to writer instance like
writer=csv.DictWriter(file_writer,fieldnames=fields)
here using Dictionary writer and assigning column names, to write column names to csv we use writer.writeheader() and to write values we use writer.writerow({"Names":"John","Age":20,"Class":"12A"}) ,while writing file values must be passed using dictionary method , here the key is column name and value is your respective key value
import csv
with open("D:\\sample.csv","w",newline="") as file_writer:
fields=["Names","Age","Class"]
writer=csv.DictWriter(file_writer,fieldnames=fields)
writer.writeheader()
writer.writerow({"Names":"John","Age":21,"Class":"12A"})
For those looking for less complicated solution. I actually find this one more simplisitic solution that will do similar job:
import pandas as pd
a = ['a','b','c']
df = pd.DataFrame({'a': a})
df= df.set_index('a').T
df.to_csv('list_a.csv', index=False)
Hope this helps as well.
you should use the CSV module for sure , but the chances are , you need to write unicode . For those Who need to write unicode , this is the class from example page , that you can use as a util module:
import csv, codecs, cStringIO
class UTF8Recoder:
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)
def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
Here is another solution that does not require the csv module.
print ', '.join(['"'+i+'"' for i in myList])
Example :
>>> myList = [u'value 1', u'value 2', u'value 3']
>>> print ', '.join(['"'+i+'"' for i in myList])
"value 1", "value 2", "value 3"
However, if the initial list contains some ", they will not be escaped. If it is required, it is possible to call a function to escape it like that :
print ', '.join(['"'+myFunction(i)+'"' for i in myList])

Remove 1000's separator from column in CSV?

I have a Python script where I'm importing a csv that has commas in values over 1000. These values are strings in the csv. I need to remove the commas from the values, and convert the strings to rounded floats inside the csv before it's imported into Python.
I've tried appending all the new values to a list to use the csv.writer, but I haven't been able to figure out how to have the writer only replace the values in the column that have commas. Here's what I have so far. :
import csv
RoomReport = r'path_to_csv'
new_values_list = []
f = open(RoomReport, "r")
reader = csv.reader(f)
writer = csv.writer(f)
for row in reader:
useable_area = row[7]
if "," in useable_area:
useable_area_no_comma = useable_area.replace(",","")
useable_area_rounded = int(round(float(useable_area_no_comma)))
new_values_list.append(useable_area_rounded)
f.close()
As I mentioned in a comment, this can only be done if the input csv file is formatted in a way that will allow the commas in the numbers to be differentiated from the commas between each one of them.
Here's an example of one way it could be done (by quoting all the values):
"0","1","2","3","4","5","6","7,123.6","8","9"
"0","1","2","3","4","5","6","1,000","8","9"
"0","1","2","3","4","5","6","20,000","8","9"
Here's code that will do what you want. It uses the locale.atof function to simplify cleaning up the number:
import csv
import locale
# Set local to someplace that uses a comma for the thousands separator.
locale.setlocale(locale.LC_ALL, 'English_US.1252')
RoomReport = r'RoomReport.csv'
cleaned_report = r'RoomReport_cleaned.csv'
new_values_list = []
with open(RoomReport, "r", newline='') as inp:
for row in csv.reader(inp):
if "," in row[7]:
row[7] = int(round(locale.atof(row[7])))
new_values_list.append(row)
# Create cleaned-up output file.
with open(cleaned_report, "w", newline='') as outp:
csv.writer(outp, quoting=csv.QUOTE_ALL).writerows(new_values_list)
The RoomReport_cleaned.csv it creates from the example input will contain this:
"0","1","2","3","4","5","6","7124","8","9"
"0","1","2","3","4","5","6","1000","8","9"
"0","1","2","3","4","5","6","20000","8","9"
Note that since the values in the output no longer have commas embedded in them, the quoting all fields is not longer necessary—so could be left out by not specifying csv.QUOTE_ALL.
maybe something like this?
import re
from sys import stdout
isnum = re.compile('^[0-9, ]+$')
non = re.compile('[, ]')
fd = StringIO()
out = csv.writer(fd)
out.writerow(['foo','1,000,000',19])
out.writerow(['bar','1,234,567',20])
fd.seek(0)
inp = csv.reader(fd)
out = csv.writer(stdout)
for row in inp:
for i, x in enumerate(row):
if isnum.match(x):
row[i] = float(non.sub('', x))
out.writerow(row)

Program that takes a list of numbers 1- 1000, reverses them , puts them into a file (called my_file) and prints them back into my_file [duplicate]

I am trying to create a .csv file with the values from a Python list. When I print the values in the list they are all unicode (?), i.e. they look something like this
[u'value 1', u'value 2', ...]
If I iterate through the values in the list i.e. for v in mylist: print v they appear to be plain text.
And I can put a , between each with print ','.join(mylist)
And I can output to a file, i.e.
myfile = open(...)
print >>myfile, ','.join(mylist)
But I want to output to a CSV and have delimiters around the values in the list e.g.
"value 1", "value 2", ...
I can't find an easy way to include the delimiters in the formatting, e.g. I have tried through the join statement. How can I do this?
import csv
with open(..., 'wb') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(mylist)
Edit: this only works with python 2.x.
To make it work with python 3.x replace wb with w (see this SO answer)
with open(..., 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(mylist)
Here is a secure version of Alex Martelli's:
import csv
with open('filename', 'wb') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(mylist)
For another approach, you can use DataFrame in pandas:
And it can easily dump the data to csv just like the code below:
import pandas
df = pandas.DataFrame(data={"col1": list_1, "col2": list_2})
df.to_csv("./file.csv", sep=',',index=False)
The best option I've found was using the savetxt from the numpy module:
import numpy as np
np.savetxt("file_name.csv", data1, delimiter=",", fmt='%s', header=header)
In case you have multiple lists that need to be stacked
np.savetxt("file_name.csv", np.column_stack((data1, data2)), delimiter=",", fmt='%s', header=header)
Use python's csv module for reading and writing comma or tab-delimited files. The csv module is preferred because it gives you good control over quoting.
For example, here is the worked example for you:
import csv
data = ["value %d" % i for i in range(1,4)]
out = csv.writer(open("myfile.csv","w"), delimiter=',',quoting=csv.QUOTE_ALL)
out.writerow(data)
Produces:
"value 1","value 2","value 3"
Jupyter notebook
Let's say that your list name is A
Then you can code the following and you will have it as a csv file (columns only!)
R="\n".join(A)
f = open('Columns.csv','w')
f.write(R)
f.close()
You could use the string.join method in this case.
Split over a few of lines for clarity - here's an interactive session
>>> a = ['a','b','c']
>>> first = '", "'.join(a)
>>> second = '"%s"' % first
>>> print second
"a", "b", "c"
Or as a single line
>>> print ('"%s"') % '", "'.join(a)
"a", "b", "c"
However, you may have a problem is your strings have got embedded quotes. If this is the case you'll need to decide how to escape them.
The CSV module can take care of all of this for you, allowing you to choose between various quoting options (all fields, only fields with quotes and seperators, only non numeric fields, etc) and how to esacpe control charecters (double quotes, or escaped strings). If your values are simple, string.join will probably be OK but if you're having to manage lots of edge cases, use the module available.
This solutions sounds crazy, but works smooth as honey
import csv
with open('filename', 'wb') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL,delimiter='\n')
wr.writerow(mylist)
The file is being written by csvwriter hence csv properties are maintained i.e. comma separated.
The delimiter helps in the main part by moving list items to next line, each time.
Here is working copy-paste example for Python 3.x with options to define your own delimiter and quote char.
import csv
mylist = ['value 1', 'value 2', 'value 3']
with open('employee_file.csv', mode='w') as employee_file:
employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
employee_writer.writerow(mylist)
This will generate employee_file.csv that looks like this:
"value 1","value 2","value 3"
NOTE:
If quoting is set to csv.QUOTE_MINIMAL, then .writerow() will quote
fields only if they contain the delimiter or the quotechar. This is
the default case.
If quoting is set to csv.QUOTE_ALL, then .writerow() will quote all
fields.
If quoting is set to csv.QUOTE_NONNUMERIC, then .writerow() will quote
all fields containing text data and convert all numeric fields to the
float data type.
If quoting is set to csv.QUOTE_NONE, then .writerow() will escape
delimiters instead of quoting them. In this case, you also must
provide a value for the escapechar optional parameter.
To create and write into a csv file
The below example demonstrate creating and writing a csv file.
to make a dynamic file writer we need to import a package import csv, then need to create an instance of the file with file reference
Ex:- with open("D:\sample.csv","w",newline="") as file_writer
here if the file does not exist with the mentioned file directory then python will create a same file in the specified directory, and "w" represents write, if you want to read a file then replace "w" with "r" or to append to existing file then "a". newline="" specifies that it removes an extra empty row for every time you create row so to eliminate empty row we use newline="", create some field names(column names) using list like fields=["Names","Age","Class"], then apply to writer instance like
writer=csv.DictWriter(file_writer,fieldnames=fields)
here using Dictionary writer and assigning column names, to write column names to csv we use writer.writeheader() and to write values we use writer.writerow({"Names":"John","Age":20,"Class":"12A"}) ,while writing file values must be passed using dictionary method , here the key is column name and value is your respective key value
import csv
with open("D:\\sample.csv","w",newline="") as file_writer:
fields=["Names","Age","Class"]
writer=csv.DictWriter(file_writer,fieldnames=fields)
writer.writeheader()
writer.writerow({"Names":"John","Age":21,"Class":"12A"})
For those looking for less complicated solution. I actually find this one more simplisitic solution that will do similar job:
import pandas as pd
a = ['a','b','c']
df = pd.DataFrame({'a': a})
df= df.set_index('a').T
df.to_csv('list_a.csv', index=False)
Hope this helps as well.
you should use the CSV module for sure , but the chances are , you need to write unicode . For those Who need to write unicode , this is the class from example page , that you can use as a util module:
import csv, codecs, cStringIO
class UTF8Recoder:
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)
def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
Here is another solution that does not require the csv module.
print ', '.join(['"'+i+'"' for i in myList])
Example :
>>> myList = [u'value 1', u'value 2', u'value 3']
>>> print ', '.join(['"'+i+'"' for i in myList])
"value 1", "value 2", "value 3"
However, if the initial list contains some ", they will not be escaped. If it is required, it is possible to call a function to escape it like that :
print ', '.join(['"'+myFunction(i)+'"' for i in myList])

How to replace a list of special characters in a csv in python

I have some csv files that may or may not contain characters like “”à that are undesirable, so I want to write a simple script that will feed in a csv and feed out a csv (or its contents) with those characters replaced with more standard characters, so in the example:
bad_chars = '“”à'
good_chars = '""a'
The problem so far is that my code seems to produce a csv with perhaps the wrong encoding? Any help would be appreciated in making this simpler and/or making sure my output csv doesn't force an incorrect regex encoding--maybe using pandas?
Attempt:
import csv, string
upload_path = sys.argv[1]
input_file = open('{}'.format(upload_path), 'rb')
upload_csv = open('{}_fixed.csv'.format(upload_path.strip('.csv')), 'wb')
data = csv.reader(input_file)
writer = csv.writer(upload_csv, quoting=csv.QUOTE_ALL)
in_chars = '\xd2\xd3'
out_chars = "''"
replace_list = string.maketrans(in_chars, out_chars)
for line in input_file:
line = str(line)
new_line = line.translate(replace_list)
writer.writerow(new_line.split(','))
input_file.close()
upload_csv.close()
As you stamped your question with the pandas tag - here is a pandas solution:
import pandas as pd
(pd.read_csv('/path/to/file.csv')
.replace(r'RegEx_search_for_str', r'RegEx_replace_with_str', regex=True)
.to_csv('/path/to/fixed.csv', index=False)
)

Create a .csv file with values from a Python list

I am trying to create a .csv file with the values from a Python list. When I print the values in the list they are all unicode (?), i.e. they look something like this
[u'value 1', u'value 2', ...]
If I iterate through the values in the list i.e. for v in mylist: print v they appear to be plain text.
And I can put a , between each with print ','.join(mylist)
And I can output to a file, i.e.
myfile = open(...)
print >>myfile, ','.join(mylist)
But I want to output to a CSV and have delimiters around the values in the list e.g.
"value 1", "value 2", ...
I can't find an easy way to include the delimiters in the formatting, e.g. I have tried through the join statement. How can I do this?
import csv
with open(..., 'wb') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(mylist)
Edit: this only works with python 2.x.
To make it work with python 3.x replace wb with w (see this SO answer)
with open(..., 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(mylist)
Here is a secure version of Alex Martelli's:
import csv
with open('filename', 'wb') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(mylist)
For another approach, you can use DataFrame in pandas:
And it can easily dump the data to csv just like the code below:
import pandas
df = pandas.DataFrame(data={"col1": list_1, "col2": list_2})
df.to_csv("./file.csv", sep=',',index=False)
The best option I've found was using the savetxt from the numpy module:
import numpy as np
np.savetxt("file_name.csv", data1, delimiter=",", fmt='%s', header=header)
In case you have multiple lists that need to be stacked
np.savetxt("file_name.csv", np.column_stack((data1, data2)), delimiter=",", fmt='%s', header=header)
Use python's csv module for reading and writing comma or tab-delimited files. The csv module is preferred because it gives you good control over quoting.
For example, here is the worked example for you:
import csv
data = ["value %d" % i for i in range(1,4)]
out = csv.writer(open("myfile.csv","w"), delimiter=',',quoting=csv.QUOTE_ALL)
out.writerow(data)
Produces:
"value 1","value 2","value 3"
Jupyter notebook
Let's say that your list name is A
Then you can code the following and you will have it as a csv file (columns only!)
R="\n".join(A)
f = open('Columns.csv','w')
f.write(R)
f.close()
You could use the string.join method in this case.
Split over a few of lines for clarity - here's an interactive session
>>> a = ['a','b','c']
>>> first = '", "'.join(a)
>>> second = '"%s"' % first
>>> print second
"a", "b", "c"
Or as a single line
>>> print ('"%s"') % '", "'.join(a)
"a", "b", "c"
However, you may have a problem is your strings have got embedded quotes. If this is the case you'll need to decide how to escape them.
The CSV module can take care of all of this for you, allowing you to choose between various quoting options (all fields, only fields with quotes and seperators, only non numeric fields, etc) and how to esacpe control charecters (double quotes, or escaped strings). If your values are simple, string.join will probably be OK but if you're having to manage lots of edge cases, use the module available.
This solutions sounds crazy, but works smooth as honey
import csv
with open('filename', 'wb') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL,delimiter='\n')
wr.writerow(mylist)
The file is being written by csvwriter hence csv properties are maintained i.e. comma separated.
The delimiter helps in the main part by moving list items to next line, each time.
Here is working copy-paste example for Python 3.x with options to define your own delimiter and quote char.
import csv
mylist = ['value 1', 'value 2', 'value 3']
with open('employee_file.csv', mode='w') as employee_file:
employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
employee_writer.writerow(mylist)
This will generate employee_file.csv that looks like this:
"value 1","value 2","value 3"
NOTE:
If quoting is set to csv.QUOTE_MINIMAL, then .writerow() will quote
fields only if they contain the delimiter or the quotechar. This is
the default case.
If quoting is set to csv.QUOTE_ALL, then .writerow() will quote all
fields.
If quoting is set to csv.QUOTE_NONNUMERIC, then .writerow() will quote
all fields containing text data and convert all numeric fields to the
float data type.
If quoting is set to csv.QUOTE_NONE, then .writerow() will escape
delimiters instead of quoting them. In this case, you also must
provide a value for the escapechar optional parameter.
To create and write into a csv file
The below example demonstrate creating and writing a csv file.
to make a dynamic file writer we need to import a package import csv, then need to create an instance of the file with file reference
Ex:- with open("D:\sample.csv","w",newline="") as file_writer
here if the file does not exist with the mentioned file directory then python will create a same file in the specified directory, and "w" represents write, if you want to read a file then replace "w" with "r" or to append to existing file then "a". newline="" specifies that it removes an extra empty row for every time you create row so to eliminate empty row we use newline="", create some field names(column names) using list like fields=["Names","Age","Class"], then apply to writer instance like
writer=csv.DictWriter(file_writer,fieldnames=fields)
here using Dictionary writer and assigning column names, to write column names to csv we use writer.writeheader() and to write values we use writer.writerow({"Names":"John","Age":20,"Class":"12A"}) ,while writing file values must be passed using dictionary method , here the key is column name and value is your respective key value
import csv
with open("D:\\sample.csv","w",newline="") as file_writer:
fields=["Names","Age","Class"]
writer=csv.DictWriter(file_writer,fieldnames=fields)
writer.writeheader()
writer.writerow({"Names":"John","Age":21,"Class":"12A"})
For those looking for less complicated solution. I actually find this one more simplisitic solution that will do similar job:
import pandas as pd
a = ['a','b','c']
df = pd.DataFrame({'a': a})
df= df.set_index('a').T
df.to_csv('list_a.csv', index=False)
Hope this helps as well.
you should use the CSV module for sure , but the chances are , you need to write unicode . For those Who need to write unicode , this is the class from example page , that you can use as a util module:
import csv, codecs, cStringIO
class UTF8Recoder:
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)
def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
Here is another solution that does not require the csv module.
print ', '.join(['"'+i+'"' for i in myList])
Example :
>>> myList = [u'value 1', u'value 2', u'value 3']
>>> print ', '.join(['"'+i+'"' for i in myList])
"value 1", "value 2", "value 3"
However, if the initial list contains some ", they will not be escaped. If it is required, it is possible to call a function to escape it like that :
print ', '.join(['"'+myFunction(i)+'"' for i in myList])

Categories

Resources