csv header in python only on the top row? - python

i have written a python program which makes an api call to a webserver once every minute and then parse the json response and saves parsed values in to the csv files.
here is the code that is saving the values into the csv file :
with open('data.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([current_time,SHORTPERC, LONGPERC, SHORTvolume, longVolume, longPositions, shortPositions])
how can i make it so that it saves the header only once on the top most row and not on every row ?
UPDATE:
here is a bit of more code to make api call and write the data to file :
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.triggers.cron import CronTrigger
import requests
import json
import csv
from datetime import datetime
def fn():
print("Starting...")
session_id = "auZsJ4F2RsQNJxSPTMDt2238324"
Outlook='http://www.myfxbook.com/api/get-community-outlook.json?session=' + session_id
Outlook_response = requests.get(Outlook)
Outlook_data = Outlook_response.json()['symbols']
now = datetime.now()
current_time = now.strftime("%H:%M")
EURUSD=Outlook_data[0]
SHORTPERC=EURUSD['shortPercentage']
LONGPERC =EURUSD['longPercentage']
SHORTvolume=EURUSD['shortVolume']
longVolume=EURUSD['longVolume']
longPositions=EURUSD['longPositions']
shortPositions=EURUSD['shortPositions']
with open('data.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([current_time,SHORTPERC, LONGPERC, SHORTvolume, longVolume, longPositions, shortPositions])
with open('data1.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([SHORTvolume, longVolume])
with open('data2.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([SHORTPERC, LONGPERC])
i cant post the full code cuz it will be very ugly since its around 700 lines long , but the above mentioned code should work to create the csv file
this is how one of my csv files look :
07:11,31,69,555.55,1265.14,4750,2607
07:12,31,69,555.55,1265.16,4751,2607
07:13,31,69,555.55,1265.16,4751,2607
07:14,30,70,555.56,1267.36,4752,2608
07:15,30,70,555.56,1267.36,4752,2608
07:16,30,70,555.56,1267.36,4752,2608
07:17,30,70,555.46,1267.36,4752,2607
07:18,31,69,558.61,1267.36,4752,2610
07:19,31,69,558.61,1267.37,4753,2610
07:20,31,69,561.58,1267.37,4753,2611
07:21,31,69,561.61,1267.37,4753,2613
07:22,31,69,561.65,1267.37,4753,2614
07:23,31,69,561.65,1267.36,4752,2614
this is just part of the csv file , more rows keep adding as time passes
EDIT 2:
answer suggested by Sparkofska seems to work but somehow it ends up giving an empty row in between every line like this:
Time,ShortPer,LongPer,ShortVolume,LongVolume,ShortPosition,LongPosition
05:47,44,56,19528.8,24789.27,65223,48630
05:48,44,56,19529.04,24789.27,65223,48633
code :
EURUSD=Outlook_data[0]
SHORTPERC=EURUSD['shortPercentage']
LONGPERC =EURUSD['longPercentage']
SHORTvolume=EURUSD['shortVolume']
longVolume=EURUSD['longVolume']
longPositions=EURUSD['longPositions']
shortPositions=EURUSD['shortPositions']
filename='EURUSD.csv';
def write_row_header_aware(filename, row):
if not os.path.exists(filename) or os.stat(filename).st_size == 0:
with open(filename, 'a') as file:
writer = csv.writer(file)
writer.writerow(['Time', 'ShortPer', 'LongPer','ShortVolume','LongVolume','ShortPosition','LongPosition'])
with open(filename, 'a') as file:
writer = csv.writer(file)
writer.writerow([current_time,SHORTPERC, LONGPERC, SHORTvolume, longVolume, longPositions, shortPositions])
write_row_header_aware(filename, [current_time,SHORTPERC, LONGPERC, SHORTvolume, longVolume, longPositions, shortPositions])
print("done...")

You could wrap the writerow function to have it automatically add the header if needed.
If your output csv file is not empty, we can assert the header was already written and simply append the row. Otherwise (file not exist or empty) we write the header before appending the row.
import os
def write_row_header_aware(filename, row):
# in case file doesn't exist or is empty
if not os.path.exists(filename) or os.stat(filename).st_size == 0:
# write header
with open(filename, 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow(['current_time', 'SHORTPERC', 'LONGPERC', ...])
# write line as usual
with open(filename, 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow(row)
write_row_header_aware('data.csv', [current_time, SHORTPERC, LONGPERC, ...])

Please make a check to know if the file exists, if it already exists use append to write rows to file else write the headers. By this way you could avoid writing headers multiple times. Please refer to this link

Related

Python 3 dictwriter appending rows to csv blank line in Windows

I have a CSV file which is created once a day based on a webhook, and then saved to S3. Any future webhooks will open the file and create an entry before closing the file.
def create_new_file(self, row, file_name):
with open(f"/tmp/{file_name}", 'w', encoding='UTF8', newline='') as f:
writer = csv.DictWriter(f, fieldnames=self.fieldnames)
writer.writeheader()
writer.writerow(row)
self.s3.upload_file(f"/tmp/{file_name}", self.bucket_name, file_name)
def append_to_file(self, row, file_name):
self.s3.download_file(self.bucket_name, file_name, f"/tmp/{file_name}")
with open(f"/tmp/{file_name}", 'r+', encoding='UTF8', newline='') as f:
last_number = f.readlines()[-1].split(",")[0]
writer = csv.DictWriter(f, fieldnames=self.fieldnames)
writer.writerow(row)
I'm encountering the common blank line issue with Python3, but only when appending items. There is no blank line between the header and the first entry so I am thinking it's something to do with how I am opening the existing file - at the moment it is in r+ mode. Am I using the wrong mode type?

Converting a large CSV file to multiple JSON files using Python

I am currently using the following code to convert a large CSV file to a JSON file.
import csv
import json
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for row in csvReader:
jsonArray.append(row)
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'test_data.csv'
jsonFilePath = r'test_data.json'
csv_to_json(csvFilePath, jsonFilePath)
This code works fine and I am able to convert the CSV to JSON without any issues. However, as the CSV file contains 600,000+ rows and hence as many items in my JSON, it has become very difficult to manage the JSON file.
I would like to modify my above code such that for every 5000 rows of the CSV, the data is written into a new JSON file. Ideally, I would be having 120 (600,000/5000) JSON files in this case.
How can I do the same?
Split up your read\write methods and add a simple threshold:
JSON_ENTRIES_THRESHOLD = 5000 # modify to whatever you see suitable
def write_json(json_array, filename):
with open(filename, 'w', encoding='utf-8') as jsonf:
json.dump(json_array, jsonf) # note the usage of .dump directly to a file descriptor
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
filename_index = 0
for row in csvReader:
jsonArray.append(row)
if len(jsonArray) >= JSON_ENTRIES_THRESHOLD:
# if we reached the treshold, write out
write_json(jsonArray, f"jsonFilePath-{filename_index}.json")
filename_index += 1
jsonArray = []
# Finally, write out the remainder
write_json(jsonArray, f"jsonFilePath-{filename_index}.json")

Why is this code still adding blank lines between data in a csv file?

The following code is intended to remove tags from a csv file. Tags are labelled as follows (the tag here is hot as an example): tag,hot
It iterates through the current csv file, writing every line to a temporary file except the tag that needs to be removed, then renames the temporary file on top of the original. Currently, it adds whitespace between every line in the original, eg:
tag,tag1
tag,tag2
tag,tag3
tag,tag4
tag,tag5
goes to the following if tag3 was passed as the tag to be removed:
tag,tag1
tag,tag2
tag,tag4
tag,tag5
As can be seen an extra line is where tag3 would have been, as well as there being blank lines between every other line. I have seen other threads that suggested adding newline='' to when the files are opened, but I have done this and it still does not solve the problem (although there was actually more whitespace before, with 2 lines between every value rather than 1). Not sure why this hasn't fixed it.
Code:
tempfile = NamedTemporaryFile(mode="w", delete=False, newline='')
CSVpart = imageNumber + ".csv"
lowercase = tag.lower()
filePath = os.path.join(os.curdir, "static", "csv", CSVpart)
if os.path.isfile(filePath):
with open(filePath, 'r', newline='') as csvfile, tempfile:
reader = csv.reader(csvfile, delimiter=',')
writer = csv.writer(tempfile)
for row in reader:
if row != ['tag', lowercase]:
writer.writerow(row)
else:
removed = True //ignore, this makes sense in context
shutil.move(tempfile.name, filePath)
I created the following, which works. Cannot figure out what is different about it:
import shutil, csv, os
from tempfile import NamedTemporaryFile
tempfile = NamedTemporaryFile(mode="w", delete=False, newline='')
tagToRemove = "hot"
filePath = os.path.join(os.curdir, "test.csv")
with open(filePath, 'r', newline='') as csvfile, tempfile:
reader = csv.reader(csvfile, delimiter=',')
writer = csv.writer(tempfile)
for row in reader:
if row != ['tag', tagToRemove]:
writer.writerow(row)
shutil.move(tempfile.name, filePath)

Python read CSV file columns and write file name and column name in a csv file

I have many CSV files, need to read all the files in loop and write file name and all the columns (header in row 1) in an output file.
Example
Input csv file 1 (test1.csv)
Id, Name, Age, Location
1, A, 25, India
Input csv file 2 (test2.csv)
Id, ProductName
1, ABC
Outputfile
test1.csv Id
test1.csv Name
test1.csv Age
test1.csv Location
test2.csv Id
test2.csv ProductName
Many thanks for your help.
Update:
This code works fine for this purpose:
import os
import csv
ofile = open('D:\Anuj\Personal\OutputFile/AHS_File_Columns_Info.csv', 'w')
directory = os.path.join('D:\Anuj\Personal\Python')
for root, dirs, files in os.walk(directory):
for file in files:
fullfilepath = directory + "/" + file
with open(fullfilepath,'r') as f:
output = file +','+ f.readline()
ofile.write(output)
clean solution using csv module for reading and writing
open output file and create a csv.writer instance on its handle
open each input file and create a csv.reader instance on their handle
get first row using next on the csv.reader iterator: gets titles as list (with a small post-processing to remove the spaces)
write titles alongside the current filename in a loop
code:
import csv
files=["test1.csv","test2.csv"]
with open("output.tsv","w",newline='') as fw:
cw = csv.writer(fw,delimiter="\t") # output is tab delimited
for filename in files:
with open(filename,'r') as f:
cr = csv.reader(f)
# get title
for column_name in (x.strip() for x in next(cr)):
cw.writerow([filename,column_name])
There are several advantages using csv module, the most important being that quoting & multi-line fields/titles are managed properly.
But I'm not sure I understand you correctly.
import csv
from typing import List
from typing import Tuple
TableType = List[List[str]]
def load_csv_table(file_name: str) -> Tuple[List[str], TableType]:
with open(file_name) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
headers = next(csv_reader)
data_table = list(csv_reader)
return headers, data_table
def save_csv_table(file_name: str, headers: List[str], data_table: TableType):
with open(file_name, 'w', newline='') as csv_file:
writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(headers)
for row in data_table:
writer.writerow(row)
input_files = ['file1.csv', 'file2.csv', 'file3.csv']
new_table = []
new_headers = []
for file_name in input_files:
headers, data_table = load_csv_table(file_name)
if not new_headers:
new_headers = ['Source'] + headers
new_table.extend(([file_name] + line for line in data_table))
save_csv_table('output.csv', new_headers, new_table)
A simple method is to use readline() on the file object:
files=["test1.csv","test2.csv"]
for my_file in files:
with open(my_file,'r') as f:
print my_file, f.readline()

JSON like data to CSV file in python - not showing headers correctly

I am transforming JSON like data to CSV and having a few issues.
The code is here:
import json
import csv
def parse_file(inputed_file):
with open(input_file, 'r') as inputed_file:
content = inputed_file.readlines()
split_file = open('test.csv', 'w')
for line in content:
lines = line.split('\t')
data = json.loads(lines[0])
writer = csv.DictWriter(split_file, fieldnames = ["title", "firstname"], delimiter = ',')
writer.writeheader()
The problem is this is adding a header on each row for the data, I want to only have the header displayed once. Then add this for the data to go below the headers:
writer.writerow(data)
I have looked at this and tried it but failed: How can I convert JSON to CSV?.
Create the DictWriter outside the loop, and just call writer.writeheader() there. Then call writer.writerow() inside the loop.
def parse_file(inputed_file):
with open(input_file, 'r') as inputed_file:
content = inputed_file.readlines()
split_file = open('test.csv', 'w')
writer = csv.DictWriter(split_file, fieldnames = ["title", "firstname"], delimiter = ',')
writer.writeheader()
for line in content:
lines = line.split('\t')
data = json.loads(lines[0])
writer.writerow(data)

Categories

Resources