Search for list values in a csv file python - python

There is a csv file, there is a unique id column and a column with a date. The task of this section of the code is that you need to scan the lines using the id key, find the id in the line, write the line to a new file called id. Faced with the problem that the interpreter returns an error by id. Although everything is logical and correct. Where did I go wrong?
id, Numder, Date
123456, 89654535556, 25.11.2021 15:35:00
321654, 96554412255, 23.11.2021 18:50:00
524163, 38095224444, 18.11.2021 13:30:00
from csv import DictReader
from csv import DictWriter
from os.path import isfile
def export_csv(user_id, master_csv, fieldnames, key_id, extension=".csv"):
filename = user_id + extension
file_exists = isfile(filename)
with open(file=master_csv) as in_file, open(
file=filename, mode="a", newline=""
) as out_file:
# Create reading and writing objects
csv_reader = DictReader(in_file)
csv_writer = DictWriter(out_file, fieldnames=fieldnames)
# Only write header once
if not file_exists:
csv_writer.writeheader()
# Go through lines and match ids
for line in csv_reader:
if line[key_id] == user_id:
# Modify line and append to file
line = {k: v.strip() for k, v in line.items() if k in fieldnames}
csv_writer.writerow(line)
export_csv(
user_id="512863",
master_csv="master.csv",
fieldnames=["Number", "Date"],
key_id="id")
Traceback (most recent call last):
File "C:/Users/sedei/PycharmProjects/convector/1.py", line 58, in
export_csv(
File "C:/Users/sedei/PycharmProjects/convector/1.py", line 52, in export_csv
if line[key_id] == user_id:
KeyError: 'id'

Related

Reading from a text file, parsing it, then converting it to a csv

I have this text file, that contains user information. I want to parse the data, so I only have the username, and then I want to create a csv file with that parsed data.
This is the text file, my script is reading from.
blah.com\user1:dajlfnadjhlasdjasnasjlfn:test1
blah.com\user2:dajlfnadjhlasdjasnasjlfn:test2
blah.com\user3:dajlfnadjhlasdjasnasjlfn:test3
blah.com\user4:dajlfnadjhlasdjasnasjlfn:test4
blah.com\user5:dajlfnadjhlasdjasnasjlfn:test5
blah.com\user6:dajlfnadjhlasdjasnasjlfn:test6
Here is my script
import time, os, os.path, sys, string, datetime, time, shutil, csv
#Locate the file
globalpath = 'C:\\users\\userinfo\\'
todaysdatefull = datetime.datetime.now()
todaysdate = todaysdatefull.strftime("%Y-%m-%d")
datapath = globalpath + 'data\\' + todaysdate + "\\"
logfile = datapath + 'userinfo.txt'
potfile = datapath + 'parsed.csv'
infile = logfile
outfile = potfile
lines = []
# Open the file, find the username and parses it
with open(infile, 'r') as f:
for line in f:
usernamestart = line.find('\\')
usernameend = line.find(':')
username = line[usernamestart+1:usernameend]
lines.append(username)
print(username)
# Outputs the data as a csv file
with open(outfile, 'w') as csv:
writer = csv.writer(csv)
for i in range(len(lines)):
writer.writerow(('Username', 'Date'))
writer.writerow(lines[i])
Result:
Traceback (most recent call last):
File "C:\Automation\autocrack\highrisk_parser.py", line 33, in <module>
writer = csv.writer(csv)
AttributeError: 'file' object has no attribute 'writer'
It is coming from this line
with open(outfile, 'w') as csv:, your are overwriting the csv import. You should rename the file where you write like this
with open(outfile, 'w') as csv_to_write:
writer = csv.writer(csv_to_write)
# Write the header once.
writer.writerow(tuple(['Username', 'Date']))
for one_line in lines:
# you have to give the function a tuple, if not, the writerow iterates on each element of the string for writing it in a new line.
writer.writerow(tuple([one_line, '']))
Your first part of code finding the username can be done as following:
with open(infile, 'r') as f:
lines = [line.split('\\')[-1].split(':')[0] for line in f]

I need my csv out put to use commas instead of semicolons so I can insert it into my sqlite DB

Right now my nmap csv is putting semicolons in the file which I need to change to commas.
nmap scan
import first
import csv
import nmap
csvFilePath = "nmapscan1.csv"
ipAddress = first.ipAddress
port = first.port
#nmap scan using user input varibles
nm = nmap.PortScanner()
nm.scan(ipAddress,port)
csv = nm.csv()
print(csv)
#writing to csv file
with open(csvFilePath, "w") as csvFile:
csvFile.write(csv)
#changes ; to , for database use
with open(r"nmapscan1.csv") as in_file, open(r"nmapscan.csv", 'w') as
out_file:
semicolonin = csv.reader(in_file, delimiter=';')
commaout = csv.writer(out_file, delimiter=',')
for row in semicolonin:
commaout.writerow(row)
error I get in Ubuntu terminal
Traceback (most recent call last):
File "second.py", line 23, in <module>
semicolonin = csv.reader(in_file, delimiter=';')
AttributeError: '_io.TextIOWrapper' object has no attribute 'reader'
Set a different name to csv on csv = nm.csv(). It is overwriting your csv on import csv.
Edited*
This part:
csv_data = nm.csv()
print(csv_data)
#writing to csv file
with open(csvFilePath, "w") as csvFile:
csvFile.write(csv_data)

write csv of nested for loop with if else

i am new to python and i learn a lot everyday . I have a specific folder that contains some xml file and i am parsing xml text of PMID ,Date ,Title and Abstract and i am writing csv of for loop with if else statement but it not printing it gives error it how to write csv of for loop with if else condition
Here is my python Code :
import os
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
import csv
path = '/home/shayez/Desktop/project/kk'
listfile = []
files = os.listdir(path)
for name in files:
listfile.append(name)
pmdata = []
for name2 in listfile:
full_file = os.path.abspath(os.path.join('project/kk',name2))
dom = ET.parse(full_file)
pmdat = dom.findall('PubmedArticle')
pmdata.append(pmdat)
def Print_Data ():
header = ['PMID','Date','Title','Abstract']
with open ('/home/shayez/Desktop/karim.csv','wt') as csvfile:
writer = csv.writer(csvfile, delimiter ="\t" )
writer.writerow(header)
for d in pmdata:
for c in d :
PMID = c.find('MedlineCitation/PMID').text
title = c.find('MedlineCitation/Article/ArticleTitle').text
Date = c.find('MedlineCitation/Article/Journal/JournalIssue/PubDate/Year')
Date2 = c.find('MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate')
Abstract = c.find('MedlineCitation/Article/Abstract/AbstractText')
rows = [PMID,Date,title,Abstract]
if Abstract is None :
print PMID,"\t",Date.text, "\t",title ,"\t", "No abstract Available"
elif Date2 is None:
print PMID,"\t",Date.text, "\t",title ,"\t",Abstract.text
elif Date is None:
print PMID,"\t",Date2.text, "\t",title ,"\t",Abstract.text
else :
print PMID,"\t","No Date", "\t",title ,"\t", "No abstract Available"
rows = [PMID,Date,title,Abstract]
writer.writerows(rows)
Print_Data()
Error :
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 699, in runfile
execfile(filename, namespace)
File "/usr/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 81, in execfile
builtins.execfile(filename, *where)
File "/home/shayez/Desktop/k.py", line 72, in <module>
Print_Data()
File "/home/shayez/Desktop/k.py", line 67, in Print_Data
writer.writerows(rows)
ValueError: I/O operation on closed file
You need your for loop to be inside the with block, otherwise it will close csvfile:
def Print_Data ():
header = ['PMID','Date','Title','Abstract']
with open ('/home/shayez/Desktop/karim.csv','wt') as csvfile:
writer = csv.writer(csvfile, delimiter ="\t" )
writer.writerow(header)
for d in pmdata:
for c in d :
PMID = c.find('MedlineCitation/PMID').text
title = c.find('MedlineCitation/Article/ArticleTitle').text
Date = c.find('MedlineCitation/Article/Journal/JournalIssue/PubDate/Year')
Date2 = c.find('MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate')
Abstract = c.find('MedlineCitation/Article/Abstract/AbstractText')
rows = [PMID,Date,title,Abstract]
# Other code
writer.writerows(rows)
Now that you've moved your writer inside your with block, we can address the other issue. writer.writerows() expects an iterable of row objects. Your rows object is a single row, throwing an exception. To accomplish what you want to do, you'll have to collect all of the row entries you want to write into a list:
with open ('/home/shayez/Desktop/karim.csv','wt') as csvfile:
writer = csv.writer(csvfile, delimiter ="\t" )
writer.writerow(header)
rows = []
for d in pmdata:
for c in d:
# code to get PMID, Date, title, Abstract
rows.append([PMID, Date, title, Abstract])
# Note that this is after your two for loops
writer.writerows(rows)
The other option you have is to use writer.writerow(row) inside the for loop on each row

how to make a copy from csv file to edit it

when i'm trying to make a copy from csv file to edit it away of the original
then I apply the effects to the original
import csv
import shutil
from tempfile import NamedTemporaryFile
filename = "data1.csv"
temp_file = NamedTemporaryFile(delete=False)
print(temp_file.name)
with open(filename, "r",encoding='utf8') as csvfile, temp_file:
reader = csv.DictReader(csvfile)
fieldnames = ["id", "name", "email", "sent"]
writer = csv.DictWriter(temp_file, fieldnames=fieldnames)
# writer.writeheader()
for row in reader:
writer.writerow({
"id":row["id"],
"name":row["name"],
"email":row["email"],
"sent":""
})
I get this error :/
C:\Users\Arafat\AppData\Local\Temp\tmpwgkcslas
Traceback (most recent call last):
File "C:\Users\Arafat\Desktop\30dpython\hungry_data.py", line 49, in <module>
"sent":""
File "C:\Users\Arafat\AppData\Local\Programs\Python\Python36-32\lib\csv.py", line 155, in writerow
return self.writer.writerow(self._dict_to_list(rowdict))
File "C:\Users\Arafat\AppData\Local\Programs\Python\Python36-32\lib\tempfile.py", line 483, in func_wrapper
return func(*args, **kwargs)
TypeError: a bytes-like object is required, not 'str'
The error is the result of your temp_file being opened in binary mode rather than text mode (the default is w+b). Change it to:
temp_file = NamedTemporaryFile(mode='w', encoding='utf8', delete=False)
(the encoding is not strictly necessary, but since you're specifying it on the input, makes sense to specify it on the output).
See https://docs.python.org/3/library/tempfile.html

Python read CSV file columns and write file name and column name in a csv file

I have many CSV files, need to read all the files in loop and write file name and all the columns (header in row 1) in an output file.
Example
Input csv file 1 (test1.csv)
Id, Name, Age, Location
1, A, 25, India
Input csv file 2 (test2.csv)
Id, ProductName
1, ABC
Outputfile
test1.csv Id
test1.csv Name
test1.csv Age
test1.csv Location
test2.csv Id
test2.csv ProductName
Many thanks for your help.
Update:
This code works fine for this purpose:
import os
import csv
ofile = open('D:\Anuj\Personal\OutputFile/AHS_File_Columns_Info.csv', 'w')
directory = os.path.join('D:\Anuj\Personal\Python')
for root, dirs, files in os.walk(directory):
for file in files:
fullfilepath = directory + "/" + file
with open(fullfilepath,'r') as f:
output = file +','+ f.readline()
ofile.write(output)
clean solution using csv module for reading and writing
open output file and create a csv.writer instance on its handle
open each input file and create a csv.reader instance on their handle
get first row using next on the csv.reader iterator: gets titles as list (with a small post-processing to remove the spaces)
write titles alongside the current filename in a loop
code:
import csv
files=["test1.csv","test2.csv"]
with open("output.tsv","w",newline='') as fw:
cw = csv.writer(fw,delimiter="\t") # output is tab delimited
for filename in files:
with open(filename,'r') as f:
cr = csv.reader(f)
# get title
for column_name in (x.strip() for x in next(cr)):
cw.writerow([filename,column_name])
There are several advantages using csv module, the most important being that quoting & multi-line fields/titles are managed properly.
But I'm not sure I understand you correctly.
import csv
from typing import List
from typing import Tuple
TableType = List[List[str]]
def load_csv_table(file_name: str) -> Tuple[List[str], TableType]:
with open(file_name) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
headers = next(csv_reader)
data_table = list(csv_reader)
return headers, data_table
def save_csv_table(file_name: str, headers: List[str], data_table: TableType):
with open(file_name, 'w', newline='') as csv_file:
writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(headers)
for row in data_table:
writer.writerow(row)
input_files = ['file1.csv', 'file2.csv', 'file3.csv']
new_table = []
new_headers = []
for file_name in input_files:
headers, data_table = load_csv_table(file_name)
if not new_headers:
new_headers = ['Source'] + headers
new_table.extend(([file_name] + line for line in data_table))
save_csv_table('output.csv', new_headers, new_table)
A simple method is to use readline() on the file object:
files=["test1.csv","test2.csv"]
for my_file in files:
with open(my_file,'r') as f:
print my_file, f.readline()

Categories

Resources