tab delimited to csv - python

I am able to get this to output my MYSQL command which I have removed for security, however I keep getting an error when I try and write this tab delimited output to a CSV. Any help to boost the Python rookie would be appreciated.
#!/usr/bin/pytho
import sys, csv
import MySQLdb
import os
import mysql.connector
import subprocess
import string
if __name__ == '__main__':
du = sys.argv[1]
csv_home = '/home/oatey/bundle_' + du + '.csv'
input = sys.stdin
output = sys.stdout
#read and rewrite to file with arguement
new = open("/home/oatey/valid.sql2", "w")
with open("/home/oatey/bundle.sql")as write_query:
#read_file = write_query.read()
for line in write_query:
lr = line.replace('{$$}', du)
print lr
new.write(lr)
new.close()
write_query.close()
with open("/home/oatey/valid.sql2") as w:
mysql_output = subprocess.check_output(MYSQL_COMMAND, stdin=w)
#print mysql_output
b = open("/home/oatey/" + du + ".txt", "r+")
#",".join("%s" % i for i in mysql_output
b.write(mysql_output)
print mysql_output
b.close()
#read tab-delimited file
with open("/home/oatey/" + du + ".txt", 'rb') as data:
cr = data.readlines()
contents = [line for line in cr]
with open("/home/oatey/" + du + ".csv", "wb") as wd:
cw = csv.writer(wd, quotechar='', quoting=csv.QUOTE_NONE)
wd.write(contents)

I bet the error you are getting is:
TypeError: must be string or buffer, not list
contents is a list, you cannot write a list via write(). Quote from docs:
file.write(str)
Write a string to the file.
Instead, use csvwriter.writerows():
with open("/home/oatey/" + du + ".csv", "wb") as wd:
cw = csv.writer(wd, quotechar='', quoting=csv.QUOTE_NONE)
cw.writerows(contents)

Related

Reading from a text file, parsing it, then converting it to a csv

I have this text file, that contains user information. I want to parse the data, so I only have the username, and then I want to create a csv file with that parsed data.
This is the text file, my script is reading from.
blah.com\user1:dajlfnadjhlasdjasnasjlfn:test1
blah.com\user2:dajlfnadjhlasdjasnasjlfn:test2
blah.com\user3:dajlfnadjhlasdjasnasjlfn:test3
blah.com\user4:dajlfnadjhlasdjasnasjlfn:test4
blah.com\user5:dajlfnadjhlasdjasnasjlfn:test5
blah.com\user6:dajlfnadjhlasdjasnasjlfn:test6
Here is my script
import time, os, os.path, sys, string, datetime, time, shutil, csv
#Locate the file
globalpath = 'C:\\users\\userinfo\\'
todaysdatefull = datetime.datetime.now()
todaysdate = todaysdatefull.strftime("%Y-%m-%d")
datapath = globalpath + 'data\\' + todaysdate + "\\"
logfile = datapath + 'userinfo.txt'
potfile = datapath + 'parsed.csv'
infile = logfile
outfile = potfile
lines = []
# Open the file, find the username and parses it
with open(infile, 'r') as f:
for line in f:
usernamestart = line.find('\\')
usernameend = line.find(':')
username = line[usernamestart+1:usernameend]
lines.append(username)
print(username)
# Outputs the data as a csv file
with open(outfile, 'w') as csv:
writer = csv.writer(csv)
for i in range(len(lines)):
writer.writerow(('Username', 'Date'))
writer.writerow(lines[i])
Result:
Traceback (most recent call last):
File "C:\Automation\autocrack\highrisk_parser.py", line 33, in <module>
writer = csv.writer(csv)
AttributeError: 'file' object has no attribute 'writer'
It is coming from this line
with open(outfile, 'w') as csv:, your are overwriting the csv import. You should rename the file where you write like this
with open(outfile, 'w') as csv_to_write:
writer = csv.writer(csv_to_write)
# Write the header once.
writer.writerow(tuple(['Username', 'Date']))
for one_line in lines:
# you have to give the function a tuple, if not, the writerow iterates on each element of the string for writing it in a new line.
writer.writerow(tuple([one_line, '']))
Your first part of code finding the username can be done as following:
with open(infile, 'r') as f:
lines = [line.split('\\')[-1].split(':')[0] for line in f]

Using Python to populate a csv file

The following piece of code creates a CSV file, but every other line is blank. How can I prevent these linebreaks from happening?
import datetime
import time
import csv
i = 0
while i < 10:
TempProbe = "78.12"
CurrentTime = time.strftime("%x")
CurrentDate = time.strftime("%I:%M:%S")
stringAll = TempProbe + "," + CurrentTime + "," + CurrentDate
print(stringAll)
file = open("outFile.csv", "a")
csvWriter = csv.writer( file )
csvWriter.writerow( [TempProbe, CurrentTime,CurrentDate] )
file.close()
i = i + 1
time.sleep(1)
This is probably because the default line terminator is '\r\n'. You can correct this by passing lineterminator='\n' to your csv.writer object, like so:
csvWriter = csv.writer(file, lineterminator='\n')
P.S. Move this line out of your while loop to avoid destroying and recreating the file writer object.
You need to set the lineterminator for your csvWriter, as in the code below.
csvWriter = csv.writer(file, lineterminator='\n')
For an explanation why, see: CSV file written with Python has blank lines between each row
You can simply use the open function to write a csv file:
file = open("outFile.csv", "a")
file.write(stringAll+'\n')
Also, you should take the file open and close functions out of the loop.
Use this: 'ab' vs 'a',writes binary should fix the problem
file = open("outFile.csv", "ab")
csvWriter = csv.writer(file, lineterminator='\n' )
or this you dont need to open/close on every write :
file = open("outFile.csv", "ab")
csvWriter = csv.writer(file, lineterminator='\n' )
i = 0
while i < 10:
TempProbe = "78.12"
CurrentTime = time.strftime("%x")
CurrentDate = time.strftime("%I:%M:%S")
stringAll = TempProbe + "," + CurrentTime + "," + CurrentDate
print(stringAll)
csvWriter.writerow( [TempProbe, CurrentTime,CurrentDate] )
i = i + 1
time.sleep(1)
file.close()

Python 2.7 CSV writer issue

I have some Python code that lists pull requests in Github. If I print the parsed json output to the console, I get the expected results, but when I output the parsed json to a csv file, I'm not getting the same results. They are cut off after the sixth result (and that varies).
What I'm trying to do is overwrite the csv each time with the latest output.
Also, I'm dealing with unicode output which I use unicodecsv for. I don't know if this is throwing the csv output off.
I will list both instances of the relevant piece of code with the print statement and with the csv code.
Thanks for any help.
import sys
import codecs
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
sys.stderr = codecs.getwriter('utf8')(sys.stderr)
import csv
import unicodecsv
for pr in result:
data = pr.as_dict()
changes = (gh.repository('my-repo', repo).pull_request(data['number'])).as_dict()
if changes['commits'] == 1 and changes['changed_files'] == 1:
#keep print to console for testing purposes
print "Login: " + changes['user']['login'] + '\n' + "Title: " + changes['title'] + '\n' + "Changed Files: " + str(changes['changed_files']) + '\n' + "Commits: " + str(changes['commits']) + '\n'
With csv:
import sys
import codecs
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
sys.stderr = codecs.getwriter('utf8')(sys.stderr)
import csv
import unicodecsv
for pr in result:
data = pr.as_dict()
changes = (gh.repository('my-repo', repo).pull_request(data['number'])).as_dict()
if changes['commits'] == 1 and changes['changed_files'] == 1:
with open('c:\pull.csv', 'r+') as f:
csv_writer = unicodecsv.writer(f, encoding='utf-8')
csv_writer.writerow(['Login', 'Title', 'Changed files', 'Commits'])
for i in changes['user']['login'], changes['title'], str(changes['changed_files']), str(changes['commits']) :
csv_writer.writerow([changes['user']['login'], changes['title'],changes['changed_files'], changes['commits']])
The problem is with the way you write data to file.
Every time you open file in r+ mode you will overwrite the last written rows.
And for dealing with JSON

Adding filename to last column in csv using python

I have a folder full of .mpt files, each of them having the same data format.
I need to delete the first 57 lines from all files and append these files into one csv - output.csv.
I have that section already:
import glob
import os
dir_name = 'path name'
lines_to_ignore = 57
input_file_format = '*.mpt'
output_file_name = "output.csv"
def convert():
files = glob.glob(os.path.join(dir_name, input_file_format))
with open(os.path.join(dir_name, output_file_name), 'w') as out_file:
for f in files:
with open(f, 'r') as in_file:
content = in_file.readlines()
content = content[lines_to_ignore:]
for i in content:
out_file.write(i)
print("working")
convert()
print("done")
This part works ok.
how do i add the filename of each .mpt file as the last column of the output.csv
Thank you!
This is a quick 'n dirty solution.
In this loop the variable i is just a string (a line from a CSV file):
for i in content:
out_file.write(i)
So you just need to 1) strip off the end of line character(s) (either "\n" or "\r\n") and append ",".
If you're using Unix, try:
for i in content:
i = i.rstrip("\n") + "," + output_file_name + "\n"
out_file.write(i)
This assumes that the field separator is a comma. Another option is:
for i in content:
i = i.rstrip() + "," + output_file_name
print >>out_file, i
This will strip all white space from the end of i.
Add quotes if you need to quote the output file name:
i = i.rstrip(...) + ',"' + output_file_name '"'
The relevant part:
with open(f, 'r') as in_file:
content = in_file.readlines()
content = content[lines_to_ignore:]
for i in content:
new_line = ",".join([i.rstrip(), f]) + "\n" #<-- this is new
out_file.write(new_line) #<-- this is new

Problems with Python's file.write() method and string handling

The problem I am having at this point in time (being new to Python) is writing strings to a text file. The issue I'm experiencing is one where either the strings don't have linebreaks inbetween them or there is a linebreak after every character. Code to follow:
import string, io
FileName = input("Arb file name (.txt): ")
MyFile = open(FileName, 'r')
TempFile = open('TempFile.txt', 'w', encoding='UTF-8')
for m_line in MyFile:
m_line = m_line.strip()
m_line = m_line.split(": ", 1)
if len(m_line) > 1:
del m_line[0]
#print(m_line)
MyString = str(m_line)
MyString = MyString.strip("'[]")
TempFile.write(MyString)
MyFile.close()
TempFile.close()
My input looks like this:
1 Jargon
2 Python
3 Yada Yada
4 Stuck
My output when I do this is:
JargonPythonYada YadaStuck
I then modify the source code to this:
import string, io
FileName = input("Arb File Name (.txt): ")
MyFile = open(FileName, 'r')
TempFile = open('TempFile.txt', 'w', encoding='UTF-8')
for m_line in MyFile:
m_line = m_line.strip()
m_line = m_line.split(": ", 1)
if len(m_line) > 1:
del m_line[0]
#print(m_line)
MyString = str(m_line)
MyString = MyString.strip("'[]")
#print(MyString)
TempFile.write('\n'.join(MyString))
MyFile.close()
TempFile.close()
Same input and my output looks like this:
J
a
r
g
o
nP
y
t
h
o
nY
a
d
a
Y
a
d
aS
t
u
c
k
Ideally, I would like each of the words to appear on a seperate line without the numbers in front of them.
Thanks,
MarleyH
You have to write the '\n' after each line, since you're stripping the original '\n';
Your idea of using '\n'.join() doesn't work because it will use\n to join the string, inserting it between each char of the string. You need a single \n after each name, instead.
import string, io
FileName = input("Arb file name (.txt): ")
with open(FileName, 'r') as MyFile:
with open('TempFile.txt', 'w', encoding='UTF-8') as TempFile:
for line in MyFile:
line = line.strip().split(": ", 1)
TempFile.write(line[1] + '\n')
fileName = input("Arb file name (.txt): ")
tempName = 'TempFile.txt'
with open(fileName) as inf, open(tempName, 'w', encoding='UTF-8') as outf:
for line in inf:
line = line.strip().split(": ", 1)[-1]
#print(line)
outf.write(line + '\n')
Problems:
the result of str.split() is a list (this is why, when you cast it to str, you get ['my item']).
write does not add a newline; if you want one, you have to add it explicitly.

Categories

Resources