write csv of nested for loop with if else - python

i am new to python and i learn a lot everyday . I have a specific folder that contains some xml file and i am parsing xml text of PMID ,Date ,Title and Abstract and i am writing csv of for loop with if else statement but it not printing it gives error it how to write csv of for loop with if else condition
Here is my python Code :
import os
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
import csv
path = '/home/shayez/Desktop/project/kk'
listfile = []
files = os.listdir(path)
for name in files:
listfile.append(name)
pmdata = []
for name2 in listfile:
full_file = os.path.abspath(os.path.join('project/kk',name2))
dom = ET.parse(full_file)
pmdat = dom.findall('PubmedArticle')
pmdata.append(pmdat)
def Print_Data ():
header = ['PMID','Date','Title','Abstract']
with open ('/home/shayez/Desktop/karim.csv','wt') as csvfile:
writer = csv.writer(csvfile, delimiter ="\t" )
writer.writerow(header)
for d in pmdata:
for c in d :
PMID = c.find('MedlineCitation/PMID').text
title = c.find('MedlineCitation/Article/ArticleTitle').text
Date = c.find('MedlineCitation/Article/Journal/JournalIssue/PubDate/Year')
Date2 = c.find('MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate')
Abstract = c.find('MedlineCitation/Article/Abstract/AbstractText')
rows = [PMID,Date,title,Abstract]
if Abstract is None :
print PMID,"\t",Date.text, "\t",title ,"\t", "No abstract Available"
elif Date2 is None:
print PMID,"\t",Date.text, "\t",title ,"\t",Abstract.text
elif Date is None:
print PMID,"\t",Date2.text, "\t",title ,"\t",Abstract.text
else :
print PMID,"\t","No Date", "\t",title ,"\t", "No abstract Available"
rows = [PMID,Date,title,Abstract]
writer.writerows(rows)
Print_Data()
Error :
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 699, in runfile
execfile(filename, namespace)
File "/usr/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 81, in execfile
builtins.execfile(filename, *where)
File "/home/shayez/Desktop/k.py", line 72, in <module>
Print_Data()
File "/home/shayez/Desktop/k.py", line 67, in Print_Data
writer.writerows(rows)
ValueError: I/O operation on closed file

You need your for loop to be inside the with block, otherwise it will close csvfile:
def Print_Data ():
header = ['PMID','Date','Title','Abstract']
with open ('/home/shayez/Desktop/karim.csv','wt') as csvfile:
writer = csv.writer(csvfile, delimiter ="\t" )
writer.writerow(header)
for d in pmdata:
for c in d :
PMID = c.find('MedlineCitation/PMID').text
title = c.find('MedlineCitation/Article/ArticleTitle').text
Date = c.find('MedlineCitation/Article/Journal/JournalIssue/PubDate/Year')
Date2 = c.find('MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate')
Abstract = c.find('MedlineCitation/Article/Abstract/AbstractText')
rows = [PMID,Date,title,Abstract]
# Other code
writer.writerows(rows)
Now that you've moved your writer inside your with block, we can address the other issue. writer.writerows() expects an iterable of row objects. Your rows object is a single row, throwing an exception. To accomplish what you want to do, you'll have to collect all of the row entries you want to write into a list:
with open ('/home/shayez/Desktop/karim.csv','wt') as csvfile:
writer = csv.writer(csvfile, delimiter ="\t" )
writer.writerow(header)
rows = []
for d in pmdata:
for c in d:
# code to get PMID, Date, title, Abstract
rows.append([PMID, Date, title, Abstract])
# Note that this is after your two for loops
writer.writerows(rows)
The other option you have is to use writer.writerow(row) inside the for loop on each row

Related

Search for list values in a csv file python

There is a csv file, there is a unique id column and a column with a date. The task of this section of the code is that you need to scan the lines using the id key, find the id in the line, write the line to a new file called id. Faced with the problem that the interpreter returns an error by id. Although everything is logical and correct. Where did I go wrong?
id, Numder, Date
123456, 89654535556, 25.11.2021 15:35:00
321654, 96554412255, 23.11.2021 18:50:00
524163, 38095224444, 18.11.2021 13:30:00
from csv import DictReader
from csv import DictWriter
from os.path import isfile
def export_csv(user_id, master_csv, fieldnames, key_id, extension=".csv"):
filename = user_id + extension
file_exists = isfile(filename)
with open(file=master_csv) as in_file, open(
file=filename, mode="a", newline=""
) as out_file:
# Create reading and writing objects
csv_reader = DictReader(in_file)
csv_writer = DictWriter(out_file, fieldnames=fieldnames)
# Only write header once
if not file_exists:
csv_writer.writeheader()
# Go through lines and match ids
for line in csv_reader:
if line[key_id] == user_id:
# Modify line and append to file
line = {k: v.strip() for k, v in line.items() if k in fieldnames}
csv_writer.writerow(line)
export_csv(
user_id="512863",
master_csv="master.csv",
fieldnames=["Number", "Date"],
key_id="id")
Traceback (most recent call last):
File "C:/Users/sedei/PycharmProjects/convector/1.py", line 58, in
export_csv(
File "C:/Users/sedei/PycharmProjects/convector/1.py", line 52, in export_csv
if line[key_id] == user_id:
KeyError: 'id'

Reading from a text file, parsing it, then converting it to a csv

I have this text file, that contains user information. I want to parse the data, so I only have the username, and then I want to create a csv file with that parsed data.
This is the text file, my script is reading from.
blah.com\user1:dajlfnadjhlasdjasnasjlfn:test1
blah.com\user2:dajlfnadjhlasdjasnasjlfn:test2
blah.com\user3:dajlfnadjhlasdjasnasjlfn:test3
blah.com\user4:dajlfnadjhlasdjasnasjlfn:test4
blah.com\user5:dajlfnadjhlasdjasnasjlfn:test5
blah.com\user6:dajlfnadjhlasdjasnasjlfn:test6
Here is my script
import time, os, os.path, sys, string, datetime, time, shutil, csv
#Locate the file
globalpath = 'C:\\users\\userinfo\\'
todaysdatefull = datetime.datetime.now()
todaysdate = todaysdatefull.strftime("%Y-%m-%d")
datapath = globalpath + 'data\\' + todaysdate + "\\"
logfile = datapath + 'userinfo.txt'
potfile = datapath + 'parsed.csv'
infile = logfile
outfile = potfile
lines = []
# Open the file, find the username and parses it
with open(infile, 'r') as f:
for line in f:
usernamestart = line.find('\\')
usernameend = line.find(':')
username = line[usernamestart+1:usernameend]
lines.append(username)
print(username)
# Outputs the data as a csv file
with open(outfile, 'w') as csv:
writer = csv.writer(csv)
for i in range(len(lines)):
writer.writerow(('Username', 'Date'))
writer.writerow(lines[i])
Result:
Traceback (most recent call last):
File "C:\Automation\autocrack\highrisk_parser.py", line 33, in <module>
writer = csv.writer(csv)
AttributeError: 'file' object has no attribute 'writer'
It is coming from this line
with open(outfile, 'w') as csv:, your are overwriting the csv import. You should rename the file where you write like this
with open(outfile, 'w') as csv_to_write:
writer = csv.writer(csv_to_write)
# Write the header once.
writer.writerow(tuple(['Username', 'Date']))
for one_line in lines:
# you have to give the function a tuple, if not, the writerow iterates on each element of the string for writing it in a new line.
writer.writerow(tuple([one_line, '']))
Your first part of code finding the username can be done as following:
with open(infile, 'r') as f:
lines = [line.split('\\')[-1].split(':')[0] for line in f]

convert the following json to csv using python

{"a":"1","b":"1","c":"1"}
{"a":"2","b":"2","c":"2"}
{"a":"3","b":"3","c":"3"}
{"a":"4","b":"4","c":"4"}
I have tried the following code but it gives error:-
from nltk.twitter import Twitter
from nltk.twitter.util import json2csv
with open('C:/Users/Archit/Desktop/raw_tweets.json', 'r') as infile:
# Variable for building our JSON block
json_block = []
for line in infile:
# Add the line to our JSON block
json_block.append(line)
# Check whether we closed our JSON block
if line.startswith('{'):
# Do something with the JSON dictionary
json2csv(json_block, 'tweets.csv', ['id','text','created_at','in_reply_to_user_id','in_reply_to_screen_name','in_reply_to_status_id','user.id','user.screen_name','user.name','user.location','user.friends_count','user.followers_count','source'])
# Start a new block
json_block = []
Error:
File "C:\Python34\lib\json\decoder.py", line 361, in raw_decode
raise ValueError(errmsg("Expecting value", s, err.value)) from None
ValueError: Expecting value: line 1 column 1 (char 0)
import csv, json
data = []
with open('C:\Users\Shahriar\Desktop\T.txt') as data_file:
for line in data_file:
data.append(json.loads(line))
keys = data[0].keys()
with open('data.csv', 'wb') as csvF:
csvWriter = csv.DictWriter(csvF, fieldnames=keys)
csvWriter.writeheader()
for d in data:
csvWriter.writerow(d)
Output:
a,c,b
1,1,1
2,2,2
3,3,3
4,4,4
This is way too late but I also stumbled upon some errors today. I figured that you actually have to import from nltk.twitter.common instead of util. Hope this helps others who stumbled upon this thread
# Read json
filename = 'C:/Users/Archit/Desktop/raw_tweets.json'
lines = [line.replace("{", "").replace("}", "").replace(":", ",") for line in open(filename)]
# Write csv
with open('out.csv', 'w') as csv_file:
for line in lines:
csv_file.write("%s\n" % line)

Python Openpyxl Append issue

I have hundreds of XML files that I need to extract two values from and ouput in an Excel or CSV file. This is the code I currently have:
#grabs idRoot and typeId root values from XML files
import glob
from openpyxl import Workbook
from xml.dom import minidom
import os
wb = Workbook()
ws = wb.active
def typeIdRoot (filename):
f = open(filename, encoding = "utf8")
for xml in f:
xmldoc = minidom.parse(f)
qmd = xmldoc.getElementsByTagName("MainTag")[0]
typeIdElement = qmd.getElementsByTagName("typeId")[0]
root = typeIdElement.attributes["root"]
global rootValue
rootValue = root.value
print ('rootValue =' ,rootValue,)
ws.append([rootValue])
wb.save("some.xlsx")
wb = Workbook()
ws = wb.active
def idRoot (filename):
f = open(filename, encoding = "utf8")
for xml in f:
xmldoc = minidom.parse(f)
tcd = xmldoc.getElementsByTagName("MainTag")[0]
activitiesElement = tcd.getElementsByTagName("id")[0]
sport = activitiesElement.attributes["root"]
sportName = sport.value
print ('idRoot =' ,sportName,)
ws.append([idRoot])
wb.save("some.xlsx")
for file in glob.glob("*.xml"):
typeIdRoot (file)
for file in glob.glob("*.xml"):
idRoot (file)
The first value follows a 1.11.111.1.111111.1.3 format. The second mixes letters and numbers. I believe this is the reason for the error:
Traceback (most recent call last):
File "C:\Python34\Scripts\xml\good.py", line 64, in <module>
idRoot (file)
File "C:\Python34\Scripts\xml\good.py", line 54, in idRoot
ws.append([idRoot])
File "C:\Python34\lib\site-packages\openpyxl\worksheet\worksheet.py", line 754, in append
cell = self._new_cell(col, row_idx, content)
File "C:\Python34\lib\site-packages\openpyxl\worksheet\worksheet.py", line 376, in _new_cell
cell = Cell(self, column, row, value)
File "C:\Python34\lib\site-packages\openpyxl\cell\cell.py", line 131, in __init__
self.value = value
File "C:\Python34\lib\site-packages\openpyxl\cell\cell.py", line 313, in value
self._bind_value(value)
File "C:\Python34\lib\site-packages\openpyxl\cell\cell.py", line 217, in _bind_value
raise ValueError("Cannot convert {0} to Excel".format(value))
ValueError: Cannot convert <function idRoot at 0x037D24F8> to Excel
I would like the result to add both values on the same row. So then I would have a new row for each file in the directory. I need to add the second value to the second row.
as such:
Value 1 Value 2
1.11.111.1.111111.1.3 10101011-0d10-0101-010d-0dc1010e0101
idRoot is the name of your FUNCTION.
So when you write
ws.append([idRoot])
you probably mean:
ws.append([sportName])
Of course, you can write something like:
ws.append([rootValue, sportName])
providing both variables are defined with reasonable values.
One last thing, you should save your file only once.

IndexError: list index out of range csv reader python

I have the following csv called report.csv. It's an excel file:
email agent_id misc
test#email.com 65483843154f35d54 blah1
test1#email.com sldd989eu99ufj9ej9e blah 2
I have the following code:
import csv
data_file = 'report.csv'
def import_data(data_file):
attendee_data = csv.reader(open(data_file, 'rU'), dialect=csv.excel_tab)
for row in attendee_data:
email = row[1]
agent_id = row[2]
pdf_file_name = agent_id + '_' + email + '.pdf'
generate_certificate(email, agent_id, pdf_file_name)
I get the following error:
Traceback (most recent call last):
File "report_test.py", line 56, in <module>
import_data(data_file)
File "report_test.py", line 25, in import_data
email = row[1]
IndexError: list index out of range
I thought the index was the number of columns in, within each row. row[1] and 'row[2]` should be within range, no?
There is most likely a blank line in your CSV file. Also, list indices start at 0, not 1.
import csv
data_file = 'report.csv'
def import_data(data_file):
attendee_data = csv.reader(open(data_file, 'rU'), dialect=csv.excel_tab)
for row in attendee_data:
try:
email = row[0]
agent_id = row[1]
except IndexError:
pass
else:
pdf_file_name = agent_id + '_' + email + '.pdf'
generate_certificate(email, agent_id, pdf_file_name)
You say you have an "Excel CSV", which I don't quite understand so I'll answer assuming you have an actual .csv file.
If I'm loading a .csv into memory (and the file isn't enormous), I'll often have a load_file method on my class that doesn't care about indexes.
Assuming the file has a header row:
import csv
def load_file(filename):
# Define data in case the file is empty.
data = []
with open(filename) as csvfile:
reader = csv.reader(csvfile)
headers = next(reader)
data = [dict(zip(headers, row)) for row in reader]
return data
This returns a list of dictionaries you can use by key, instead of index. The key will be absent in the event, say misc is missing from the row (index 2), so simply .get from the row. This is cleaner than a try...except.
for row in data:
email = row.get('email')
agent_id = row.get('agent_id')
misc = row.get('misc')
This way the order of the file columns don't matter, only the headers do. Also, if any of the columns have a blank value, your script won't error out by giving an IndexError. If you don't want to include blank values, simply handle them by checking:
if not email:
do.something()
if not agent_id:
do.something_else()

Categories

Resources