Outputting data to csv in python

Outputting data to csv in python - python

I have data that this script outputs as a scatterplot of ID_all[:,0],ID_all[:,1], but I want to also save the data used for the scatter plot as two columns (as a csv file). Any help really appreciated! Thanks in advance.
More Info:
Here is a link to the initial csv data so that you can see my problem (only first two columns used).
https://www.dropbox.com/s/966iviv01tccv1k/contacts.csv?dl=0
If you plot the input csv file (see figure 1), you will see that it only has half of the values that I need. So when I plot this with the below script, I transpose the 'matrix', making a mirror image of this vrt the main diagonal. Now with this script I plot both the original and the transpose (figure 2), but I somehow cannot figure out how to just output columns of data in addition to the figure.
import numpy as np
import matplotlib.pyplot as plt
def parsefile(filename):
i = 0
print('Opening File')
try:
for line in open(filename):
line = line.split()
ID = np.hstack((int(line[0]), int(line[1])))
if i==0:
ID_list = ID
i+=1
if i>0:
ID_list = np.vstack((ID_list, ID))
print('File Opened')
return ID_list
except:
return ID_list
def get_scatter_data(ID_list):
ID_swapped1 = ID_list[:, 1]
ID_swapped2 = ID_list[:, 0]
ID_swapped = np.transpose([ID_swapped1.T, ID_swapped2.T])
N = np.max(ID_list)
diagonal = np.linspace(1,N,N)
ID_diagonal = np.transpose([diagonal.T,diagonal.T])
ID_all = np.vstack((ID_list,ID_swapped,ID_diagonal))
return ID_all
def plot_contactmap(ID_all, savefilename):
f = plt.figure()
plt.scatter(ID_all[:,0],ID_all[:,1], marker='o', s=0.01)
plt.axis('square')
plt.xlabel('Residue Number', fontsize = 14)
plt.ylabel('Residue Number', fontsize = 14)
plt.tick_params(axis='both', which='major', labelsize=14)
plt.show()
f.savefig(savefilename+'.png', bbox_inches='tight')
f.savefig(savefilename+'.pdf', bbox_inches='tight')
def main(filename, savefilename):
ID_list = parsefile(filename+'.csv')
ID_all = get_scatter_data(ID_list)
plot_contactmap(ID_all, savefilename)
main('contacts', 'test')

You can save using numpy.savetxt() method
import numpy as np
ID_all = np.array([[1,2],[3,4],[5,6]])
np.savetxt('output.csv', ID_all, delimiter=',', fmt="%d")
If you want to give column names you can use header argument np.savetxt() method.
np.savetxt('output.csv', ID_all, delimiter=',', header='first, second', fmt="%d")

Related

problem in plotting multiple lists using matplotlib

I am writing a script which can be used to plot the country wise covid time-series data. It is working fine when I plot a single country but The scale at Y-axis is in appropriately printed.
Plot which I am getting The Problem is after printing the maximum value for one country the y axis is extrapolated with smaller values to plot the data points of subsequent countries.
The code for my script is as follows
import requests
from contextlib import closing
import csv
import matplotlib.pyplot as plt
url = "https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv"
def prepareCountryWiseData(country):
countryWise = {}
with closing(requests.get(url, stream=True)) as r:
f = (line.decode('utf-8') for line in r.iter_lines())
reader = csv.reader(f, delimiter=',', quotechar='"')
active = []
recovered = []
dates = []
for row in reader:
if row[1] == country:
dates.append(row[0])
active.append(row[2])
recovered.append(row[3])
return (dates, active, recovered)
def plotCountryWiseData(countryList):
plotable = []
for country in countryList:
dates,active,recovered = (prepareCountryWiseData(country))
plt.plot(active)
plt.ylabel('active_cases')
plt.legend(countryList)
plt.show()
plotCountryWiseData(['India','US','Italy'])

If you can use the pandas module your job would be much easier:
import pandas as pd, matplotlib.pyplot as plt
url = "https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv"
df = pd.read_csv(url)
fig,ax = plt.subplots()
for k,g in df[df['Country'].isin(['India','US','Italy'])].groupby('Country'):
ax = g.plot(ax=ax,kind='line',x='Date',y='Confirmed',label=k)
plt.gcf().suptitle('Active Cases')
plt.show()
Result:

Plotting 3 diffrent coloums from a CSV file python

My goal is to use the sorted result data to plot "Month vs Mean Temp" graph for each year on the same window.
I've sorted the first two columns that have the year and the month respectively and then saved the new sorted data into a file called NewFile, but I can't seem to get to a solution here, I used csv reader and now I'm using numpy,
Code:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
csv1 = open('Data_5.1.csv')
data = np.array(list(csv.reader(csv1,delimiter=',').astype("string")
year = data[:,0]
mounth = data[:,1]
temp= data[:,3]
fig, ax = plt.subplots(figsize=(10,10))
ax.plot(year, mounth, label='mounth/year')
ax.plot(year, temp, label='year/temp')
plt.legend()
But it just throws an error saying:
File "<ipython-input-282-282e91df631f>", line 9
year = data[:,0]
^
SyntaxError: invalid syntax
I will put two links to the files, the Data_5.1 and the NewFile respectively
Data_5.1
NewFile

1 - You didn't close brackets in line 6, hence you are getting the error in line 8.
2 - astype("string") is not needed in line 6.
I fixed your code, but you will have to complete the subplotting. Good luck!
import numpy as np
import matplotlib.pyplot as plt
import csv
plt.style.use('ggplot')
csv1 = open('Data_5.1.csv')
data = np.array(list(csv.reader(csv1,delimiter=',')))
year = data[:,0]
mounth = data[:,1]
temp= data[:,3]
fig, ax = plt.subplots(2,2) #This will create 4X4 subplots in one window
ax[0,0].plot(year, mounth, label='mounth/year') #This will plot in the 0,0 subplot
ax[0,1].plot(year, temp, label='year/temp') #This will plot in the 0,1 subplot
'''
For you to continue.
'''
plt.legend()
plt.show()

Your data is in a CSV file, and it's non-homogenous in type. Pandas is really the more appropriate tool for this.
I had to adapt your CSV slightly due to encoding errors, here is what it ended up looking like:
year,Month,Other Month,temperature_C
2003,Jan.,Some val,17.7
2004,Jan.,Some val,19.5
2005,Jan.,Some val,17.3
2006,Jan.,Some val,17.8
...
Here is a general sketch of what the code you shared could look like after the refactoring:
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use('ggplot')
# csv1 = open('Data_5.1.csv')
# data = np.array(list(csv.reader(csv1,delimiter=',').astype("string")
df_1 = pd.read_csv('../resources/Data_5.1.csv', header=0, names=['year', 'month', 'some_col', 'temp'],
dtype={'some_col': str, 'temp': float, 'month': str, 'year': str})
year = df_1['year']
month = df_1['month']
temp = df_1['temp']
fig, ax = plt.subplots(figsize=(10, 10))
ax.plot(year, month, label='month/year')
ax.plot(year, temp, label='year/temp')
plt.show()
Let me know if you have any questions :)

Reading list from CSV and convert to float list

I want to do some math on my data and stuck with it.
I am reading string lists from csv files and can plot them so far.
Now I am trying to convert the string lists to float lists as i want to do some math on the list items and create new lists and plot them.
import numpy as np
import math
import matplotlib.pyplot as plt
with open("testlog.csv") as f:
data = f.read()
data = data.split('\n')
time = [row.split(',')[0] for row in data]
gyro_x_lsb = [row.split(',')[1] for row in data]
gyro_y = [row.split(',')[2] for row in data]
gyro_z = [row.split(',')[3] for row in data]
accel_x = [row.split(',')[4] for row in data]
accel_y = [row.split(',')[5] for row in data]
accel_z = [row.split(',')[6] for row in data]
comp_x = [row.split(',')[7] for row in data]
comp_y = [row.split(',')[8] for row in data]
comp_z = [row.split(',')[9] for row in data]
temp = [row.split(',')[10] for row in data]
gyro_x_lsb = float(gyro_x_lsb)# make floats in a new list
gyro_x_dps = [gyro_x_lsb / (32768*2000) for gyro_x_dps_f in gyro_x_lsb]
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax1.set_title("Gyro X AR [LSB]")
ax1.set_xlabel('Time[ms]')
ax1.set_ylabel('AR [LSB]')
ax1.plot(time,gyro_x_lsb, c='r', label='X')
leg = ax1.legend()
ax2 = fig.add_subplot(212)
ax2.set_title("MPU9250_test_Accel")
ax2.set_xlabel('Time[ms]')
ax2.set_ylabel('Acceleration')
ax2.plot(time,accel_x, c='r', label='Accel_X')
leg = ax2.legend()
plt.show()
I am trying to calculate every item in "gyro_x_lsb"/32768*2000. But it will not work.
I have tried map also already.
Thanks in advance...
TMP36
BTW: I use Anaconda 3(2.5.0) with Python 3.5
I am new to Python and to this forum.

List comprehensions will do a lot for you.
Like:
gyro_x_lsb_sum = sum(float(item) for item in gyro_x_lsb)
Here I am using a generator expression which is the same syntax as a list comprehension, but better performance in this case.
Hope it helps a bit.
PS. Consider using the Python csv module to read CSV files.

Thanks for the hints with the CSV module. This read my data now as tuple. This was also new to me, and I found a possibility to make some math on my data. Here is the code now that works for me, maybe not perfect, but i am happy for now.
import csv
import matplotlib.pyplot as plt
with open("testlog.csv") as data:
#reader = csv.reader(data) #read columns as strings
reader = csv.reader(data, quoting=csv.QUOTE_NONNUMERIC) #read columns as numbers
time, gyro_x, gyro_y, gyro_z, accel_x, accel_y, accel_z, comp_x, comp_y, comp_z, temp = zip(*reader)
#gyro_x_dps = gyro_x
def gyro_x_dps(gyro_x):
return tuple( [e / 65536000 for e in gyro_x])
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax1.set_title("Gyro X AR [LSB]")
ax1.set_xlabel('Time[ms]')
ax1.set_ylabel('AR [LSB]')
ax1.plot(time,gyro_x, c='r', label='X')
leg = ax1.legend()
ax2 = fig.add_subplot(212)
ax2.set_title("Gyro X AR [dps]")
ax2.set_xlabel('Time[ms]')
ax2.set_ylabel('AR [dps]')
ax2.plot(time,gyro_x_dps(gyro_x), c='r', label='X')
leg = ax2.legend()
plt.show()

plotting multiple plots generated inside a for loop on the same axes python

My code is as follows, the problem is instead of having one plot, I get 242 plots. I tried putting the plt.show() outside the loop, it didn't work.
import numpy as np
import matplotlib.pyplot as plt
import csv
names = list()
with open('selected.csv','rb') as infile:
reader = csv.reader(infile, delimiter = ' ')
for row in reader:
names.append(row[0])
names.pop(0)
for j in range(len(names)):
filename = '/home/mh/Masters_Project/Sigma/%s.dat' %(names[j])
average, sigma = np.loadtxt(filename, usecols = (0,1), unpack = True, delimiter = ' ')
name = '%s' %(names[j])
plt.figure()
plt.xlabel('Magnitude(average)', fontsize = 16)
plt.ylabel('$\sigma$', fontsize = 16)
plt.plot(average, sigma, marker = '+', linestyle = '', label = name)
plt.legend(loc = 'best')
plt.show()

Your issue is that you're creating a new figure with every iteration using plt.figure(). Remove this line from your for loop and it should work fine, as this short example below shows.
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(10)
for a in [1.0, 2.0, 3.0]:
plt.plot(x, a*x)
plt.show()

Let me improve your code a bit:
import numpy as np
import matplotlib.pyplot as plt
# set the font size globally to get the ticklabels big too:
plt.rcParams["font.size"] = 16
# use numpy to read in the names
names = np.genfromtxt("selected.csv", delimiter=" ", dtype=np.str, skiprows=1)
# not necessary butyou might want to add options to the figure
plt.figure()
# don't use a for i in range loop to loop over array elements
for name in names:
# use the format function
filename = '/home/mh/Masters_Project/Sigma/{}.dat'.format(name)
# use genfromtxt because of better error handling (missing numbers, etc)
average, sigma = np.genfromtxt(filename, usecols = (0,1), unpack = True, delimiter = ' ')
plt.xlabel('Magnitude(average)')
plt.ylabel('$\sigma$')
plt.plot(average, sigma, marker = '+', linestyle = '', label = name)
plt.legend(loc = 'best')
plt.show()

Python plot dates using matplotlib

I'm very beginner at Python and matplotlib but trying to learn! I would like to use matplotlib to plot some simple data from a CSV containing dates with a frequency. The X axis containing dates and Y containing the frequency. Example data from CSV:
2011/12/15,5
2011/12/11,4
2011/12/19,2
I checked the "matplotlib.sf.net/examples" out but appears all the test data is downloaded from a http get. I would really appreciate if someone could guide me with some example code of how to read in (presumably using CSV reader) and display data in chart.
Thank you!!

Maybe you look for something like:
import csv
import datetime as dt
import matplotlib.pyplot as plt
arch = 'C:\\Python26\\programas\\test.csv'
data = csv.reader(open(arch))
data = [(dt.datetime.strptime(item, "%Y/%m/%d"), float(value)) for item, value in data]
data.sort()
[x, y] = zip(*data)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x, y)
ax.grid(True)
fig.autofmt_xdate()
plt.show()

I've tried to keep my code as simple as possible and this is by no means elegant, but here you go:
import csv
import matplotlib.pyplot as plt
### Making test CSV file ###
data = [['2011/12/15,5'],['2011/12/11,4'],['2011/12/19,2'],['2011/12/16,3'],['2011/12/20,8'],['2011/12/14,4'],['2011/12/10,10'],['2011/12/9,7']]
with open('test.csv', 'wb') as f:
writer = csv.writer(f)
for i in data:
writer.writerow(i)
### Extract data from CSV ###
with open('test.csv', 'rb') as n:
reader = csv.reader(n)
dates = []
freq = []
for row in reader:
values = row[0].split(',')
dates.append(values[0])
freq.append(values[1])
### Do plot ###
false_x = [x for x in range(len(dates))]
plt.plot(false_x,freq, 'o-')
plt.xticks(range(len(dates)), (dates), rotation=45)
# plt.axis([xmin, xmax, ymin, ymax]) - sets axes limits on graph
plt.axis([-1, 8, 0, 11])
plt.show()
This makes:

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Outputting data to csv in python - python

Related

problem in plotting multiple lists using matplotlib

Plotting 3 diffrent coloums from a CSV file python

Reading list from CSV and convert to float list

plotting multiple plots generated inside a for loop on the same axes python

Python plot dates using matplotlib

Categories

Resources