Modifying file content using python - python

I have a file that consists of a single line:
a,x,b,c,d,e
I want to convert this into
a,x,b,x,c,x,d,x,e,x
Is there any easy way to achieve this with python?

my_file = open(filename)
data = my_file.read()
data = data.split(',')
str = ''
for each in data:
if each != 'x':
str += each + ',' + 'x' + ','
str= str.strip(',')
print str

import re
s = open(filename).read()
open(filename, 'w').write(',x,'.join(re.findall(r'[a-wyz]', s)) + ',x\n')

Related

Replace space with underscore using python

I need some help, I have a a txt file with spaces between words, I want to replace the space with underscore.
fileHandler = open('nog_rename_update.txt')
for eachline in fileHandler:
new_name = fileHandler.replace(" ","_")
print(new_name)
That's my code but it keeps throwing error messages
new_name = fileHandler.replace(" ","_")
AttributeError: '_io.TextIOWrapper' object has no attribute 'replace'
example files that I want to remove space and add underscore
Here's a generic approach that should work for you:
teststring = 'hello world this is just a test. don\'t mind me 123.'
# replace multiple spaces with one space
while ' ' in teststring:
teststring = teststring.replace(' ', ' ')
# replace space with underscore (_)
teststring = teststring.replace(' ', '_')
print(teststring)
assert teststring == "hello_world_this_is_just_a_test._don't_mind_me_123." # True
Using a file example:
fname = 'mah_file.txt'
with open(fname) as in_file:
contents = in_file.read()
while ' ' in contents:
contents = contents.replace(' ', ' ')
# write updated contents back to file
with open(fname, 'w') as out_file:
out_file.write(contents.replace(' ', '_'))
This opens the files, reads line by line, splits the line into two parts, and combines the two parts with an underscore. I stored it in a list that you can use to do your next step.
with open('nog_rename_update.txt') as f:
new_list = []
for line in f:
# split the line
split = line.split()
new_list.append(split[0]+"_"+split[1])
# print the list to see results
print(new_list)
#
# add code to loop through the new list and to write to a file
#
Try out this
fileHandler = open('nog_rename_update.txt').read()
new_name = fileHandler.replace(" ", "_")
print(new_name)
f = open("test.txt", "r")
text=f.read()
f.close()
f=open("testfile.txt", "w+")
text2=''
if ' ' in text:
text2 = text.replace(' ' , '_')
print(text2)
f.write(text2)
f.close()
Here is another, less verbose solution. Simply use re.sub:
import re
file_name = r"D:\projects\playground\python\data\test.txt"
with open(file_name, "r") as file:
for line in file:
print(re.sub("( )+", "_", line), end="")
And if you want to replace the spaces in your text file:
import re
file_name = r"D:\projects\playground\python\data\test.txt"
lines = []
with open(file_name, "r") as file:
lines = [
re.sub("( )+", "_", line) for line in file.readlines()
]
with open(file_name, "w") as file:
file.writelines(lines)
Or use fileinput:
import re
import fileinput
file_name = r"D:\projects\playground\python\data\test.txt"
with fileinput.FileInput(file_name, inplace=True, backup=".bak") as file:
for line in file:
print(re.sub("( )+", "_", line), end="")

Keeping the quotes when reading CSV file in python

Truoble with a really annoying homework. I have a csv-file with lots of comma-delimitered fields per row. I need to take the last two fields from every row and write them into a new txt-file. The problem is that some of the latter fields have sentences, those with commas are in double quotes, those without them aren't. For example:
180,easy
240min,"Quite easy, but number 3, wtf?"
300,much easier than the last assignment
I did this and it worked just fine, but the double quotes disappear. The assignment is to copy the fields to the txt-file, use semicolon as delimiter and remove possible line breaks. The text must remain exactly the same. We have an automatic check system, so it's no use arguing if this makes any sense.
import csv
file = open('myfile.csv', 'r')
output= open('mytxt.txt', 'w')
csvr = csv.reader(file)
headline = next(csvr)
for line in csvr:
lgt = len(line)
time = line[lgt - 2].replace('\n', '')
feedb = line[lgt - 1].replace('\n', '')
if time != '' and feedb != '':
output.write(time + ';' + feedb + '\n')
output.close()
file.close()
Is there some easy solution for this? Can I use csv module at all? No one seems to have exactly the same problem.
Thank you all beforehand.
Try this,
import csv
file = open('myfile.csv', 'r')
output= open('mytxt.txt', 'w')
csvr = csv.reader(file)
headline = next(csvr)
for line in csvr:
lgt = len(line)
time = line[lgt - 2].replace('\n', '')
feedb = line[lgt - 1].replace('\n', '')
if time != '' and feedb != '':
if ',' in feedb:
output.write(time + ';"' + feedb + '"\n')
else:
output.write(time + ';' + feedb + '\n')
output.close()
file.close()
Had to do it the ugly way, the file was too irrational. Talked with some collaegues on the same course and apparently the idea was NOT to use csv module here, but to rehearse basic file handling in Python.
file = open('myfile.csv','r')
output = open('mytxt.txt', 'w')
headline = file.readline()
feedb_lst = []
count = 0
for line in file:
if line.startswith('1'): #found out all lines should start with an ID number,
data_lst = line.split(',', 16) #that always starts with '1'
lgt = len(data_lst)
time = data_lst[lgt - 2]
feedb = data_lst[lgt - 1].rstrip()
feedback = [time, feedb]
feedb_lst.append(feedback)
count += 1
else:
feedb_lst[count - 1][1] = feedb_lst[count - 1][1] + line.rstrip()
i = 1
for item in feedb_lst:
if item[0] != '' and item[1] != '':
if i == len(feedb_lst):
output.write(item[0] + ';' + item[1])
else:
output.write(item[0] + ';' + item[1] + '\n')
i += 1
output.close()
file.close()
Thank you for your help!

how to Adding list values into a JSON file

How to add list values into a JSON file. Here is my code :
import os,string
drives_a = [chr(x) + ':' for x in range(65, 90) if os.path.exists(chr(x) + ':')]
Does this work for you?
import json
lst = [1,2,3]
file_name = 'test.json'
with open(file_name, 'w') as f:
f.write(json.dumps(lst))

lxml encoding changes while appending string to list

I'm using lxml and etree to parse an html-file. The code looks like this:
def get_all_languages():
allLanguages = "http://wold.livingsources.org/vocabulary"
f = urllib.urlopen(allLanguages).read()
#inFile = "imLog.xml"
#html = f.read()
#f.close()
#encoding = chardet.detect(f)['encoding']
#f.decode(encoding, 'replace').encode('utf-8')
html = etree.HTML(f)
#result = etree.tostring(html, pretty_print=True, method="html")
#print result #is of type string
#print type(result)
return html
Than, I'm extracting some information from the webside and save it in an array.
While appending the string to an array, the encoding or format of the string changes. I think is some kind of unicode object or so??
So I thought it might not be challenging for me, because I remove it from the array and print it in an output file.
def print_file():
#outputfile
output = 'WOLDDictionaries.txt'
dataLanguage = get_data_all_languages()
dataDictionaries = get_site_single_lang()
outputFile = open(output, "w")
outputFile.flush()
for index, array in enumerate(dataLanguage):
indexLang = index
for item in array:
string = item
#indexLang = index
outputFile.write(string + "\t")
outputFile.write("\n")
#outputFile.flush()
for index, array in enumerate(dataDictionaries):
#stringArray = str(array)
indexDic = index
#outputFile.write(index + stringArray + "\t")
if(indexLang == indexDic):
#outputFile.write(string + "\t")
for data in array:
#stringData = str(data)
#outputFile.write(stringData + "\t")
for word in data:
stringWord = word
outputFile.write(stringWord + "\t")
outputFile.write("\n")
#outputFile.flush()
outputFile.close()
Well this thougth was wrong. While printing it to a file, the encoding is still wrong.
What can I do to get the right characters?
Ok I found the problem. It was in the parser.
I changed the encoding in the parser and got the right encoding in the end.
The mothod now looks like this:
myparser = etree.HTMLParser(encoding = 'utf-8')
html = etree.HTML(f, parser=myparser)
#print etree.tostring(xml, pretty_print=True, method="xml")
#print type(xml)
return html

Problems with Python's file.write() method and string handling

The problem I am having at this point in time (being new to Python) is writing strings to a text file. The issue I'm experiencing is one where either the strings don't have linebreaks inbetween them or there is a linebreak after every character. Code to follow:
import string, io
FileName = input("Arb file name (.txt): ")
MyFile = open(FileName, 'r')
TempFile = open('TempFile.txt', 'w', encoding='UTF-8')
for m_line in MyFile:
m_line = m_line.strip()
m_line = m_line.split(": ", 1)
if len(m_line) > 1:
del m_line[0]
#print(m_line)
MyString = str(m_line)
MyString = MyString.strip("'[]")
TempFile.write(MyString)
MyFile.close()
TempFile.close()
My input looks like this:
1 Jargon
2 Python
3 Yada Yada
4 Stuck
My output when I do this is:
JargonPythonYada YadaStuck
I then modify the source code to this:
import string, io
FileName = input("Arb File Name (.txt): ")
MyFile = open(FileName, 'r')
TempFile = open('TempFile.txt', 'w', encoding='UTF-8')
for m_line in MyFile:
m_line = m_line.strip()
m_line = m_line.split(": ", 1)
if len(m_line) > 1:
del m_line[0]
#print(m_line)
MyString = str(m_line)
MyString = MyString.strip("'[]")
#print(MyString)
TempFile.write('\n'.join(MyString))
MyFile.close()
TempFile.close()
Same input and my output looks like this:
J
a
r
g
o
nP
y
t
h
o
nY
a
d
a
Y
a
d
aS
t
u
c
k
Ideally, I would like each of the words to appear on a seperate line without the numbers in front of them.
Thanks,
MarleyH
You have to write the '\n' after each line, since you're stripping the original '\n';
Your idea of using '\n'.join() doesn't work because it will use\n to join the string, inserting it between each char of the string. You need a single \n after each name, instead.
import string, io
FileName = input("Arb file name (.txt): ")
with open(FileName, 'r') as MyFile:
with open('TempFile.txt', 'w', encoding='UTF-8') as TempFile:
for line in MyFile:
line = line.strip().split(": ", 1)
TempFile.write(line[1] + '\n')
fileName = input("Arb file name (.txt): ")
tempName = 'TempFile.txt'
with open(fileName) as inf, open(tempName, 'w', encoding='UTF-8') as outf:
for line in inf:
line = line.strip().split(": ", 1)[-1]
#print(line)
outf.write(line + '\n')
Problems:
the result of str.split() is a list (this is why, when you cast it to str, you get ['my item']).
write does not add a newline; if you want one, you have to add it explicitly.

Categories

Resources