Save the loop output into csv file - python

I want to save the loop result into a csv file or dataframe; the below code just writes the tweets to the console.
j =1
sortedDF = tweets_df.sort_values(by = ['Polarity'])
for i in range (0, sortedDF.shape[0]):
if(sortedDF['Analysis'][i] == 'Positive'):
print(str(j)+')'+ sortedDF['transalted'][i])
print()
j = j+1

with open("some.csv", "w") as f:
j = 1
sortedDF = tweets_df.sort_values(by=['Polarity'])
for i in range(0, sortedDF.shape[0]):
if (sortedDF['Analysis'][i] == 'Positive'):
f.write(str(j) + ')' + sortedDF['transalted'][i])
print()
j = j + 1

We can call writelines at the end instead of write in for loop which is more optimized solution.
sortedDF = tweets_df.sort_values(by=['Polarity'])
file = open("positive_tweets.csv", "w")
lines = []
j = 1
for i in range(0, sortedDF.shape[0]):
if (sortedDF['Analysis'][i] == 'Positive'):
lines.append(str(j) + ')' + sortedDF['transalted'][i])
j += 1
file.writelines(lines)
file.close()

Related

How to split file with certain conditions for each end line of each file

I have a .txt file like this:
2019-03-29 12:03:07 line1
line2
line3
....
2019-03-30 07:05:09 line1
line2
....
2019-03-31 10:03:20 line1
line2
....
I split the file into several files, like this:
inputData = 'dirname\..'
numThrd = 3
def chunkFiles():
nline = sum(1 for line in open(inputData,'r', encoding='utf-8', errors='ignore'))
chunk_size = math.floor(nline/int(numThrd))
n_thread = int(numThrd)
j = 0
with open(inputData,'r', encoding='utf-8', errors='ignore') as fileout:
for i, line in enumerate(fileout):
if (i + 1 == j * chunk_size and j != n_thread) or i == nline:
out.close()
if i + 1 == 1 or (j != n_thread and i + 1 == j * chunk_size):
chunkFile = 'rawData' + str(j+1) + '.txt'
if os.path.isfile(chunkFile ):
break
out = open(chunkFile , 'w+', encoding='utf-8', errors='ignore')
j = j + 1
fLine = line[:-1]
if not matchLine:
if out.closed != True:
out.write(line)
if i % 1000 == 0 and i != 0:
print ('Processing line %i ...' % (i))
However, I want the split file to meet the condition that the last line in the chunk file must be right before the line that has the date.
recent output that I got:
rawData1.txt
2019-03-29 12:03:07 line1
line2
....
-------------------------
rawData2.txt
line50
line51
2019-03-30 07:05:09 line1
line2
.....
Desired output:
rawData1.txt
2019-03-29 12:03:07 line1
line2
line3
....
-------------------------
rawData2.txt
2019-03-30 07:05:09 line1
line2
....
what should I add to the script above to meet that conditions?
Thank you very much
You can produce the desired output by using a list to hold the lines you want to write (see below).
def write_chunk(filename, chunk):
with open(filename, "w") as out:
for i in chunk:
out.write(i)
chunk = []
n_chunk = 1
with open("data.txt") as f:
for line in f:
if not line[0].isspace() and chunk:
write_chunk("{}.txt".format(n_chunk), chunk)
chunk = []
n_chunk += 1
chunk.append(line)
# write final chunk
write_chunk("{}.txt".format(n_chunk), chunk)

Python Count paragraph

Hello all so i've been tasked to count lines and paragraphs. Counting every line is obviously easy but im stuck on counting the paragraphs. If a paragraph has no character it will give back the number zero and for every paragraph is an increment higher. For example an input file is: Input and an Output should come out Output
so my code is:
def insert_line_para_nums(infile, outfile):
f = open(infile, 'r')
out = open(outfile, 'w')
linecount = 0
for i in f:
paragraphcount = 0
if '\n' in i:
linecount += 1
if len(i) < 2: paragraphcount *= 0
elif len(i) > 2: paragraphcount = paragraphcount + 1
out.write('%-4d %4d %s' % (paragraphcount, linecount, i))
f.close()
out.close()
def insert_line_para_nums(infile, outfile):
f = open(infile, 'r')
out = open(outfile, 'w')
linecount = 0
paragraphcount = 0
empty = True
for i in f:
if '\n' in i:
linecount += 1
if len(i) < 2:
empty = True
elif len(i) > 2 and empty is True:
paragraphcount = paragraphcount + 1
empty = False
if empty is True:
paragraphnumber = 0
else:
paragraphnumber = paragraphcount
out.write('%-4d %4d %s' % (paragraphnumber, linecount, i))
f.close()
out.close()
This is one way to do it, and not the prettiest.
import re
f = open('a.txt', 'r')
paragraph = 0
lines = f.readlines()
for idx, line in enumerate(lines):
if not line == '\n':
m = re.search(r'\w', line)
str = m.group(0)
try:
# if the line is a newline, and the previous line has a str in it, then
# count it as a paragraph.
if line == '\n' and str in lines[idx-1]:
paragraph +=1
except:
pass
if lines[-1] != '\n': # if the last line is not a new line, count a paragraph.
paragraph +=1
print paragraph

Python, sorting numbers error

def selectionSort(lst):
with lst as f:
nums = [int(line) for line in f]
for i in range(len(nums) - 1, 0, -1):
maxPos = 0
for position in range(1, i + 1):
if nums[position] > nums[maxPos]:
maxPos = position
value = nums[i]
nums[i] = nums[maxPos]
nums[maxPos] = value
def main():
textFileName = input("Enter the Filename: ")
lst = open(textFileName)
selectionSort(lst)
print(lst)
main()
Okay, thanks to hcwhsa for helping me out with the reading file and putting them all in one line.
When I run that code, i get this following error:
<_io.TextIOWrapper name='numbers.txt' mode='r' encoding='UTF-8'>
textfile:
67
7
2
34
42
Any help? Thanks.
You should return the list from the function and assign it to a variable and then print it.
def selectionSort(lst):
with lst as f:
nums = [int(line) for line in f]
...
...
return nums
sorted_lst = selectionSort(lst)
print(sorted_lst)
Your code didn't work because instead of passing the list you passed the file object to the function. This version of your code passes the list to the function, so no return value is required as you're modifying the same list object:
def selectionSort(nums):
for i in range(len(nums) - 1, 0, -1):
maxPos = 0
for position in range(1, i + 1):
if nums[position] > nums[maxPos]:
maxPos = position
value = nums[i]
nums[i] = nums[maxPos]
nums[maxPos] = value
def main():
textFileName = input("Enter the Filename: ")
with open(textFileName) as f:
lst = [int(line) for line in f]
selectionSort(lst)
print(lst)
main()

Python Write To File Missing Lines

I'm having trouble using python to write strings into a file:
(what I'm trying to do is using python to generate some C programs)
The code I have is the following:
filename = "test.txt"
i = 0
string = "image"
tempstr = ""
average1 = "average"
average2 = "average*average"
output = ""
FILE = open(filename,"w")
while i < 20:
j = 0
output = "square_sum = square_sum + "
while j < 20:
tempstr = string + "_" + str(i) + "_" + str(j)
output = output + tempstr + "*" + tempstr + " + " + average2 + " - 2*" + average1 + "*" + tempstr
if j != 19:
output = output + " + "
if j == 19:
output = output + ";"
j = j + 1
output = output + "\n"
i = i + 1
print(output)
FILE.writelines(output)
FILE.close
The print gives me correct output, but the FILE has last line missing and some of the second last line missing. What's the problem in writing strings into file?
Thank you!
Probably help if you called the method...
FILE.close()
The problem is that you aren't calling the close() method, just mentioning it in the last line. You need parens to invoke a function.
Python's with statement can make that unnecessary though:
with open(filename,"w") as the_file:
while i < 20:
j = 0
output = "square_sum = square_sum + "
...
print(output)
the_file.writelines(output)
When the with clause is exited, the_file will be closed automatically.
Try:
with open(filename,"w") as FILE:
while i < 20:
# rest of your code with proper indent...
no close needed...
First, a Pythonified version of your code:
img = 'image_{i}_{j}'
avg = 'average'
clause = '{img}*{img} + {avg}*{avg} - 2*{avg}*{img}'.format(img=img, avg=avg)
clauses = (clause.format(i=i, j=j) for i in xrange(20) for j in xrange(20))
joinstr = '\n + '
output = 'square_sum = {};'.format(joinstr.join(clauses))
fname = 'output.c'
with open(fname, 'w') as outf:
print output
outf.write(output)
Second, it looks like you are hoping to speed up your C code by fanatical inlining. I very much doubt the speed gains will justify your efforts over something like
maxi = 20;
maxj = 20;
sum = 0;
sqsum = 0;
for(i=0; i<maxi; i++)
for(j=0; j<maxj; j++) {
t = image[i][j];
sum += t;
sqsum += t*t;
}
square_sum = sqsum + maxi*maxj*average*average - 2*sum*average;
Looks like your indentation may be incorrect, but just some other comments about your code:
writelines() writes the content of a list or iterator to the file.
Since your outputting a single string, just use write().
lines ["lineone\n", "line two\n"]
f = open("myfile.txt", "w")
f.writelines(lines)
f.close()
Or just:
output = "big long string\nOf something important\n"
f = open("myfile.txt", "w")
f.write(output)
f.close()
As another side note it maybe helpful to use the += operator.
output += "more text"
# is equivalent to
output = output + "more text"

Why my code is getting NZEC run time error?

Question source: SPOJ.. ORDERS
def swap(ary,idx1,idx2):
tmp = ary[idx1]
ary[idx1] = ary[idx2]
ary[idx2] = tmp
def mkranks(size):
tmp = []
for i in range(1, size + 1):
tmp = tmp + [i]
return tmp
def permutations(ordered, movements):
size = len(ordered)
for i in range(1, size): # The leftmost one never moves
for j in range(0, int(movements[i])):
swap(ordered, i-j, i-j-1)
return ordered
numberofcases = input()
for i in range(0, numberofcases):
sizeofcase = input()
tmp = raw_input()
movements = ""
for i in range(0, len(tmp)):
if i % 2 != 1:
movements = movements + tmp[i]
ordered = mkranks(sizeofcase)
ordered = permutations(ordered, movements)
output = ""
for i in range(0, sizeofcase - 1):
output = output + str(ordered[i]) + " "
output = output + str(ordered[sizeofcase - 1])
print output
Having made your code a bit more Pythonic (but without altering its flow/algorithm):
def swap(ary, idx1, idx2):
ary[idx1], ary[idx2] = [ary[i] for i in (idx2, idx1)]
def permutations(ordered, movements):
size = len(ordered)
for i in range(1, len(ordered)):
for j in range(movements[i]):
swap(ordered, i-j, i-j-1)
return ordered
numberofcases = input()
for i in range(numberofcases):
sizeofcase = input()
movements = [int(s) for s in raw_input().split()]
ordered = [str(i) for i in range(1, sizeofcase+1)]
ordered = permutations(ordered, movements)
output = " ".join(ordered)
print output
I see it runs correctly in the sample case given at the SPOJ URL you indicate. What is your failing case?

Categories

Resources