I'm having trouble storing data minus the header into a new file. I don't understand Python enough to debug.
Ultimately, I'd like to extract data from each file and store into one main csv file rather than opening each file individually, while copying and pasting everything into the main csv file I would like.
My code is as follows:
import csv, os
# os.makedirs() command will create a folder titled in green or in apostrophies
os.makedirs('HeaderRemoved', exist_ok=True)
# Loop through every file in the current working directory.
for csvFilename in os.listdir('directory'):
if not csvFilename.endswith('.csv'):
continue #skips non-csv files
print('Removing header from ' + csvFilename + '...')
### Read the CSV file in (skipping first Row)###
csvRows = []
csvFileObj = open(csvFilename)
readerObj = csv.reader(csvFileObj)
for row in readerObj:
if readerObj.line_num == 1:
continue # skips first row
csvRows.append(row)
print (csvRows) #----------->Check to see if it has anything stored in array
csvFileObj.close()
#Todo: Write out the CSV file
csvFileObj = open(os.path.join('HeaderRemoved', 'directory/mainfile.csv'), 'w',
newline='')
csvWriter = csv.writer(csvFileObj)
for row in csvRows:
csvWriter.writerow(row)
csvFileObj.close()
The csv files that are being "scanned" or "read" have text and numbers. I do not know if this might be preventing the script from properly "reading" and storing the data into the csvRow array.
The problem comes from you reusing the same variable when you loop over your file names. See the documentation for listdir, it returns a list of filenames. Then your newfile isn't really pointing to the file anymore, but
to a string filename from the directory.
https://docs.python.org/3/library/os.html#os.listdir
with open(scancsvFile, 'w') as newfile:
array = []
#for row in scancsvFile
for newfile in os.listdir('directory'): # <---- you're reassigning the variable newfile here
if newfile.line_num == 1:
continue
array.append(lines)
newfile.close()
Related
I want to open a variable number of csv files and then I would like to iterate over the csv files opened and upload 1 row of each file at a time to my sql database.
For example, loop through each file uploading the first row of each file to the database, then loop again through each file uploading the second row of each file to the database.
However, I'm stuck in having the csv files ready to be uploaded in a single object.
The error happens at 'csv_data[i] = csv.reader...'
Each file is for a different table, so I cannot append them.
import csv
import sys
i = 0
for argv in sys.argv[1:]:
csv_file = open(argv, newline='', encoding='utf-8-sig')
csv_data[i] = csv.reader(csv_file, dialect='excel', delimiter=',', quotechar='|')
csv_file.close()
i += 1
After this code, I would need something to loop through each file uploading a certain row number.
zip together the files, iterate through them:
file_handles = [open(file, newline='', encoding='utf-8-sig') for file in argv[1:]]
readers = (csv.reader(file, dialect='excel', delimiter=',', quotechar='|') for file in file_handles)
# zip here
for line_group in zip(*readers):
# line_group is a tuple of line i of each file
# don't forget to close your files
for file_handle in file_handles:
try:
file_handle.close()
except:
print("Issue closing one of the files")
I've written a script in python which is able to fetch the title of different posts from a webpage and write them to a csv file. As the site updates it's content very frequently, I like to append the new result first in that csv file where there are already list of old titles available.
I've tried with:
import csv
import time
import requests
from bs4 import BeautifulSoup
url = "https://stackoverflow.com/questions/tagged/python"
def get_information(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'lxml')
for title in soup.select(".summary .question-hyperlink"):
yield title.text
if __name__ == '__main__':
while True:
with open("output.csv","a",newline="") as f:
writer = csv.writer(f)
writer.writerow(['posts'])
for items in get_information(url):
writer.writerow([items])
print(items)
time.sleep(300)
The above script which when run twice can append the new results after the old results.
Old data are like:
A
F
G
T
New data are W,Q,U.
The csv file should look like below when I rerun the script:
W
Q
U
A
F
G
T
How can I append the new result first in an existing csv file having old data?
Inserting data anywhere in a file except at the end requires rewriting the whole thing. To do this without reading its entire contents into memory first, you could create a temporary csv file with the new data in it, append the data from the existing file to that, delete the old file and rename the new one.
Here's and example of what I mean (using a dummy get_information() function to simplify testing).
import csv
import os
from tempfile import NamedTemporaryFile
url = 'https://stackoverflow.com/questions/tagged/python'
csv_filepath = 'updated.csv'
# For testing, create a existing file.
if not os.path.exists(csv_filepath):
with open(csv_filepath, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows([item] for item in 'AFGT')
# Dummy for testing.
def get_information(url):
for item in 'WQU':
yield item
if __name__ == '__main__':
folder = os.path.abspath(os.path.dirname(csv_filepath)) # Get dir of existing file.
with NamedTemporaryFile(mode='w', newline='', suffix='.csv',
dir=folder, delete=False) as newf:
temp_filename = newf.name # Save filename.
# Put new data into the temporary file.
writer = csv.writer(newf)
for item in get_information(url):
writer.writerow([item])
print([item])
# Append contents of existing file to new one.
with open(csv_filepath, 'r', newline='') as oldf:
reader = csv.reader(oldf)
for row in reader:
writer.writerow(row)
print(row)
os.remove(csv_filepath) # Delete old file.
os.rename(temp_filename, csv_filepath) # Rename temporary file.
Since you intend to change the position of every element of the table, you need to read the table into memory and rewrite the entire file, starting with the new elements.
You may find it easier to (1) write the new element to a new file, (2) open the old file and append its contents to the new file, and (3) move the new file to the original (old) file name.
I need to use a list of file names from a CSV file to find and copy the respective files. Below is the code. I'm not getting any errors, but the following is not yielding any results (I have checked and rechecked the sample list I created with the appropriate files). Any idea where I messed up? I appreciate any and all help. (I'm using the latest version of Python)
import os, shutil, csv
files_to_find = []
with open('C:\\pdfsearch.csv') as fh:
reader = csv.reader(fh)
files_to_find = list(reader)
for root, dirs, files in os.walk('C:\\mail'):
for _file in files:
if _file in files_to_find:
print ('Found file in: ') + str(root)
shutil.copy(os.path.abspath(root + '/' + _file), 'C:\\Matches')
the problem is that csvreader returns rows so you end up with a list of lists ... so instead of the output you expect ['1003055716CBR201510.pdf', '1003080516CBR201510.pdf'] you get [['1003055716CBR201510.pdf'], ['1003080516CBR201510.pdf']]
just dont use csvreader
_files_to_find=set(open("pdfsearch.csv").read().splitlines(False))
or alternatively take the first element of each row
with open('C:\\pdfsearch.csv') as fh:
reader = csv.reader(fh)
for row in reader:
_files2find.append(row[0])
I am using Blair's Python script which modifies a CSV file to add the filename as the last column (script appended below). However, instead of adding the file name alone, I also get the Path and File name in the last column.
I run the below script in windows 7 cmd with the following command:
python C:\data\set1\subseta\add_filename.py C:\data\set1\subseta\20100815.csv
The resulting ID field is populated by the following C:\data\set1\subseta\20100815.csv, although, all I need is 20100815.csv.
I'm new to python so any suggestion is appreciated!
import csv
import sys
def process_file(filename):
# Read the contents of the file into a list of lines.
f = open(filename, 'r')
contents = f.readlines()
f.close()
# Use a CSV reader to parse the contents.
reader = csv.reader(contents)
# Open the output and create a CSV writer for it.
f = open(filename, 'wb')
writer = csv.writer(f)
# Process the header.
header = reader.next()
header.append('ID')
writer.writerow(header)
# Process each row of the body.
for row in reader:
row.append(filename)
writer.writerow(row)
# Close the file and we're done.
f.close()
# Run the function on all command-line arguments. Note that this does no
# checking for things such as file existence or permissions.
map(process_file, sys.argv[1:])
Use os.path.basename(filename). See http://docs.python.org/library/os.path.html for more details.
Hi I have a csv file with names and surnames and empty username and password columns.
How can I use python csv to write to the columns 3 and 4 in each row, just appending to it, not overwriting anything.
The csv module doesn't do that, you'd have to write it out to a separate file then overwrite the old file with the new one, or read the whole file into memory and then write over it.
I'd recommend the first option:
from csv import writer as csvwriter, reader as cvsreader
from os import rename # add ', remove' on Windows
with open(infilename) as infile:
csvr = csvreader(infile)
with open(outfilename, 'wb') as outfile:
csvw = csvwriter(outfile)
for row in csvr:
# do whatever to get the username / password
# for this row here
row.append(username)
row.append(password)
csvw.writerow(row)
# or 'csvw.writerow(row + [username, password])' if you want one line
# only on Windows
# remove(infilename)
rename(outfilename, infilename)