Working with SVG paths in BeautifulSoup & Python

Working with SVG paths in BeautifulSoup & Python - python

I'm writing a Python script that will color in various areas of my city's Census Block Groups (of which there are 18) different colors according to their respective median household incomes on a map that's in the SVG format.
Sounds simple enough, right? Well, I can't figure out how, though I'm making slight progress. What I've tried so far is making a list of each of the block group paths according to how the SVG references them, making a list of the median household incomes, then passing in the code that colors them. However, this just.. doesn't seem to be working, for whatever reason. Can any of you wonderful people help figure out where I'm misfiring?
import csv
from bs4 import BeautifulSoup
icbg = []
reader = csv.reader(open('censusdata.csv'),delimiter=",")
#read and get income
for row in reader:
income = row[6]
income = int(income)
icbg.append(income)
svg = open('NM2.svg','r')
soup = BeautifulSoup(svg,"lxml")
#find CBGs and incomes
path1 = soup.find('path')
path2 = path1.find_next('path')
path3 = path2.find_next('path')
path4 = path3.find_next('path')
path5 = path4.find_next('path')
path6 = path5.find_next('path')
path7 = path6.find_next('path')
path8 = path7.find_next('path')
path9 = path8.find_next('path')
path10 = path9.find_next('path')
path11 = path10.find_next('path')
path12 = path11.find_next('path')
path13 = path12.find_next('path')
path14 = path13.find_next('path')
path15 = path14.find_next('path')
path16 = path15.find_next('path')
path17 = path16.find_next('path')
path18 = path17.find_next('path')
incomep1 = icbg[0]
incomep2 = icbg[1]
incomep3 = icbg[2]
incomep4 = icbg[3]
incomep5 = icbg[4]
incomep6 = icbg[5]
incomep7 = icbg[6]
incomep8 = icbg[7]
incomep9 = icbg[8]
incomep10 = icbg[9]
incomep11 = icbg[10]
incomep12 = icbg[11]
incomep13 = icbg[12]
incomep14 = icbg[13]
incomep15 = icbg[14]
incomep16 = icbg[15]
incomep17 = icbg[16]
incomep18 = icbg[17]
paths = (path1, path2, path3, path4, path5, path6, path7, path8, path9, path10,
path11, path12, path13, path14, path15, path16, path17, path18)
incomes = (incomep1,incomep2,incomep3,incomep4,incomep5,incomep6,incomep7,incomep8,
incomep9,incomep10,incomep11,incomep12,incomep13,incomep14,incomep15,incomep16,incomep17,incomep18)
#set colors
colors = ['fee5d9','fcae91','fb6a4a','de2d26','a50f15']
for p in paths:
for i in range(0,17):
it = incomes[i]
if it > 20000:
color_class = 2
elif it > 25000:
color_class = 1
elif it > 30000:
color_class = 3
elif it > 35000:
color_class = 4
color = colors[color_class]
path_style = "font-size:12px;fill:#%s;fill-rule:nonzero;stroke:#000000;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel" % color
p['style'] = path_style
print(soup.prettify())
Running this gives me an SVG file like so: fill:#fb6a4a;fill-rule:nonzero;stroke:#000000;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel"> comes up 18 times, meaning for every available path, even though these paths have different incomes.
could the problem be with the way I wrote my comparisons?

From my understanding of what you are trying to do, your problem is that you have 2 for loops instead of one. You should loop through the paths and incomes at the same time. The way you are doing it now is you are looping through all the incomes for each path. The following code simply moves the paths into the same loop as the income so they are looped through at the same time.
for i in range(0,17):
it = incomes[i]
p = paths[i]
if it > 20000:
color_class = 2
elif it > 25000:
color_class = 1
elif it > 30000:
color_class = 3
elif it > 35000:
color_class = 4
color = colors[color_class]
path_style = "font-size:12px;fill:#%s;fill-rule:nonzero;stroke:#000000;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel" % color
p['style'] = path_style

Related

Problem with python code for Keithley 2450, pymeasure "apply current" does not work

I'm trying to make a python program to communicate with my keithley2450 sourcemeter but I can't impose a current.
Let me explain I manage to communicate with the keithley2450, and I would like to impose a current for a period of time that I define and measure the resistance and the voltage.
But I can't impose a current.
from pymeasure.instruments.keithley import Keithley2450
import time
import openpyxl
t_acquisition = 10
tempsreel = 0
import pyvisa
rm = pyvisa.ResourceManager()
print(rm.list_resources())
wb = openpyxl.Workbook()
sheet = wb.active
try:
keithley = Keithley2450('USB0::0x05E6::0x2450::04490659::0::INSTR')
keithley.apply_current() # Sets up to source current
keithley.source_current_range = 10e-3 # Sets the source current range to 10 mA
keithley.compliance_voltage = 10 # Sets the compliance voltage to 10 V
keithley.source_current = 0 # Sets the source current to 0 mA
keithley.enable_source() # Enables the source output
keithley.measure_resistance()
keithley.measure_voltage()
keithley.ramp_to_current(5e-3) # Ramps the current to 5 mA
except:
raise ConnectionError("Unable to connect to the Keithley 2450.")
list_current = []
list_voltage = []
list_resistance = []
liste_temps_mesure = []
liste_temps = []
while int(tempsreel) < t_acquisition :
#list_current.append(keithley.current)
list_voltage.append(keithley.voltage)
list_resistance.append(keithley.resistance)
temps = time.time()
liste_temps_mesure.append(temps)
tempsreel = temps - liste_temps_mesure[0]
liste_temps.append(tempsreel)
print(f"{tempsreel}, {keithley.resistance}, {keithley.voltage}")
sheet['A'+str(1)].value = 'Temps (s)'
#sheet['B'+str(1)].value = 'Current (A)'
sheet['C'+str(1)].value = 'Voltage (V)'
sheet['D'+str(1)].value = 'Resistance'
for i in range(0,len(liste_temps)):
sheet['A'+str(i+2)].value = liste_temps[i]
#sheet['B'+str(i+2)].value = list_current[i]
sheet['C'+str(i+2)].value = list_voltage[i]
sheet['D'+str(i+2)].value = list_resistance[i]
wb.save('Data_sourcemetre.xlsx')
print('Data saved !')

IndexError: list index out of range (Pygame)

I am new to pygame and I am trying to code a game and with it I am using an engine by a youtuber as they used it in their tutorial. Everyhting is working fine but for some reason I have a problem with loading animations. It shows "IndexError: list index out of range" and says that to
animation_id = entity_info[1]
Does anyone know how to fix this? Here is the code to the 2 problems:
global animation_database
animation_database = {}
global animation_higher_database
animation_higher_database = {}
def load_animations(path):
global animation_higher_database, e_colorkey
f = open(path + 'entity_animations.txt','r')
data = f.read()
f.close()
for animation in data.split('\n'):
sections = animation.split(' ')
anim_path = sections[0]
entity_info = anim_path.split('/')
entity_type = entity_info[0]
animation_id = entity_info[1]
timings = sections[1].split(';')
tags = sections[2].split(';')
sequence = []
n = 0
timing: str
for timing in timings:
sequence.append([n, int(timing)])
n += 1
anim = animation_sequence(sequence,path + anim_path,e_colorkey)
if entity_type not in animation_higher_database:
animation_higher_database[entity_type] = {}
animation_higher_database[entity_type][animation_id] = [anim.copy(),tags]
This part is my code:
e.load_animations(r'C:/Users/zo432/PycharmProjects/New game/data/images/entities/')
If you need of the other code just message I can give you.

Keyerror none of are in the [index]

im trying to write a Collaborative Filter for my movie recommendation engine. I need a mapping dictionary for fuzzy matching. I tried to write a movieId-title matrix but i got an exception like that:
Keyerror none of are in the [index] in enumerate line.
I checked matrices, id's etc. But i couldn't find solution. (I am using The Movies Dataset from kaggle).
movies_metadata = pd.DataFrame(pd.read_csv('movies_metadata.csv'))
ratings = pd.DataFrame(pd.read_csv('ratings_small.csv'))
movies = movies_metadata[['id','title']]
movies['movieId'] = movies['id']
movies = movies.drop('id',axis = 1)
ratings = ratings.drop('timestamp', axis=1)
user_nums = len(ratings.userId.unique())
item_nums = len(movies.movieId.unique())
total_count = user_nums * item_nums
rating_zero = total_count - ratings.shape[0]
ratings_count_temp = pd.DataFrame(ratings.groupby('rating').size(), columns = ['count'])
ratings_count = ratings_count_temp.append(pd.DataFrame({'count': rating_zero}, index=[0.0]),verify_integrity=True,).sort_index()
ratings_count['log_count'] = np.log(ratings_count['count'])
movies_count = pd.DataFrame(ratings.groupby('movieId').size(), columns = ['count'])
movies_count.head()
movies_count['count'].quantile(np.arange(1, 0.6, -0.05))
popular_movies = list(set(movies_count.query('count >= 50').index))
popular_ratings = ratings[ratings.movieId.isin(popular_movies)]
users_count = pd.DataFrame(popular_ratings.groupby('userId').size(), columns=['count'])
users_count['count'].quantile(np.arange(1, 0.5, -0.05))
active_users = list(set(users_count.query('count >= 50').index))
popular_active_ratings = popular_ratings[popular_ratings.userId.isin(active_users)]
movie_user_mat = popular_active_ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
movie_to_idx = {
movies: i for i, movies in
enumerate(list(movies.set_index('movieId').loc[movie_user_mat.index].title))
}

How to read large NetCDF data sets without using a for - Python

Good morning, I have a problem when reading a large netCDF file in python, which contains meteorological information, that information must go through it to assemble the information and then insert it into the database, but the time it takes to go through and assemble the information is too much, I know there must be other ways to perform the same process more efficiently, currently I access the information through a for loop, below the code
content = nc.Dataset(pathFile+file)
XLONG, XLAT = content.variables["XLONG"], content.variables["XLAT"]
Times = content.variables["Times"] #Horas formar b 'b
RAINC = content.variables["RAINC"] #Lluvia
Q2 = content.variables["Q2"] #Humedad especifica
T2 = content.variables["T2"] #Temperatura
U10 = content.variables["U10"] #Viento zonal
V10 = content.variables["V10"] #Viento meridional
SWDOWN = content.variables["SWDOWN"] #Radiacion incidente
PSFC = content.variables["PSFC"] #Presion de la superficie
SST = content.variables["SST"] #Temperatura de la superficie del mar
CLDFRA = content.variables["CLDFRA"] #Fraccion de nubes
for c2 in range(len(XLONG[0])):
for c3 in range(len(XLONG[0][c2])):
position += 1
for hour in range(len(Times)):
dateH = getDatetimeInit(dateFormatFile.hour) if hour == 0 else getDatetimeForHour(hour, dateFormatFile.hour)
hourUTC = getHourUTC(hour)
RAINH = str(RAINC[hour][0][c2][c3])
Q2H = str(Q2[hour][0][c2][c3])
T2H = str(convertKelvinToCelsius(T2[hour][0][c2][c3]))
U10H = str(U10[hour][0][c2][c3])
V10H = str(V10[hour][0][c2][c3])
SWDOWNH = str(SWDOWN[hour][0][c2][c3])
PSFCH = str(PSFC[hour][0][c2][c3])
SSTH = str(SST[hour][0][c2][c3])
CLDFRAH = str(CLDFRA[hour][0][c2][c3] )
rowData = [idRun, functions.IDMODEL, idTime, position, dateH.year, dateH.month, dateH.day, dateH.hour, RAINH, Q2H, T2H, U10H, V10H, SWDOWNH, PSFCH, SSTH, CLDFRAH]
dataProcess.append(rowData)

I would use NumPy. Let us assume you have netCDF with 2 variables, "t2" and "slp". Then you could use the following code to vectorize your data:
#!//usr/bin/env ipython
# ---------------------
import numpy as np
from netCDF4 import Dataset
# ---------------------
filein = 'test.nc'
ncin = Dataset(filein);
tair = ncin.variables['t2'][:];
slp = ncin.variables['slp'][:];
ncin.close();
# -------------------------
tairseries = np.reshape(tair,(np.size(tair),1));
slpseries = np.reshape(slp,(np.size(slp),1));
# --------------------------
## if you want characters:
#tairseries = np.array([str(val) for val in tairseries]);
#slpseries = np.array([str(val) for val in slpseries]);
# --------------------------
rowdata = np.concatenate((tairseries,slpseries),axis=1);
# if you want characters, do this in the end:
row_asstrings = [[str(vv) for vv in val] for val in rowdata]
# ---------------------------
Nevertheless, I have a feeling that using strings is not very good idea. In my example, the conversion from numerical arrays to strings, took quite long time and therefore I did not implement it before concatenation.
If you want also some time/location information, you can do like this:
#!//usr/bin/env ipython
# ---------------------
import numpy as np
from netCDF4 import Dataset
# ---------------------
filein = 'test.nc'
ncin = Dataset(filein);
xin = ncin.variables['lon'][:]
yin = ncin.variables['lat'][:]
timein = ncin.variables['time'][:]
tair = ncin.variables['t2'][:];
slp = ncin.variables['slp'][:];
ncin.close();
# -------------------------
tairseries = np.reshape(tair,(np.size(tair),1));
slpseries = np.reshape(slp,(np.size(slp),1));
# --------------------------
## if you want characters:
#tairseries = np.array([str(val) for val in tairseries]);
#slpseries = np.array([str(val) for val in slpseries]);
# --------------------------
rowdata = np.concatenate((tairseries,slpseries),axis=1);
# if you want characters, do this in the end:
#row_asstrings = [[str(vv) for vv in val] for val in rowdata]
# ---------------------------
# =========================================================
nx = np.size(xin);ny = np.size(yin);ntime = np.size(timein);
xm,ym = np.meshgrid(xin,yin);
xmt = np.tile(xm,(ntime,1,1));ymt = np.tile(ym,(ntime,1,1))
timem = np.tile(timein[:,np.newaxis,np.newaxis],(1,ny,nx));
xvec = np.reshape(xmt,(np.size(tair),1));yvec = np.reshape(ymt,(np.size(tair),1));timevec = np.reshape(timem,(np.size(tair),1)); # to make sure that array's size match, I am using the size of one of the variables
rowdata = np.concatenate((xvec,yvec,timevec,tairseries,slpseries),axis=1);
In any case, with variable sizes (744,150,150), it took less than 2 seconds to vectorize 2 variables.

Pandas Dataframe Only Returning first Row of JSON Data

I'm working on a web scraping project, and have all the right code that returns me the json data in the format that I want if I used the #print command below, but when I got to run the same code except through Pandas Dataframe it only returns the first row of Data that I'm looking for. Just running the print, it returns the expected 17 rows of data I'm looking for. Dataframe to CSV gives me the first row only. Totally stumped! So grateful for anyone's help!
for item in response['body']:
DepartureDate = item['legs'][0][0]['departDate']
ReturnDate = item['legs'][1][0]['departDate']
Airline = item['legs'][0][0]['airline']['code']
Origin = item['legs'][0][0]['depart']
Destination = item['legs'][0][0]['destination']
OD = (Origin + Destination)
TrueBaseFare = item['breakdown']['baseFareAmount']
YQYR = item['breakdown']['fuelSurcharge']
TAX = item['breakdown']['totalTax']
TTL = item['breakdown']['totalFareAmount']
MARKEDUPTTL = item['breakdown']['totalCalculatedFareAmount']
MARKUP = ((MARKEDUPTTL - TTL) / (TTL)*100)
FBC = item['fareBasisCode']
#print(DepartureDate,ReturnDate,Airline,OD,TrueBaseFare,YQYR,TAX,TTL,MARKEDUPTTL,MARKUP,FBC)
MI = pd.DataFrame(
{'Dept': [DepartureDate],
'Ret': [ReturnDate],
'AirlineCode': [Airline],
'Routing': [OD],
'RealFare': [TrueBaseFare],
'Fuel': [YQYR],
'Taxes': [TAX],
'RealTotal': [TTL],
'AgencyTotal': [MARKEDUPTTL],
'Margin': [MARKUP],
'FareBasis': [FBC],
})
df = pd.DataFrame(MI)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
df.to_csv('MITest7.csv')

When you print all your values after the cycle, you will see that you get only the last values. To resolve this problem you need to create lists and put there your values.
Try this:
DepartureDate = []
ReturnDate = []
Airline = []
Origin = []
Destination = []
OD = []
TrueBaseFare = []
YQYR = []
TAX = []
TTL = []
MARKEDUPTTL = []
MARKUP = []
FBC = []
for item in response['body']:
DepartureDate.append(item['legs'][0][0]['departDate'])
ReturnDate.append(item['legs'][1][0]['departDate'])
Airline.append(item['legs'][0][0]['airline']['code'])
Origin.append(item['legs'][0][0]['depart'])
Destination.append(item['legs'][0][0]['destination'])
OD.append((Origin[-1] + Destination[-1]))
TrueBaseFare.append(item['breakdown']['baseFareAmount'])
YQYR.append(item['breakdown']['fuelSurcharge'])
TAX.append(item['breakdown']['totalTax'])
TTL.append(item['breakdown']['totalFareAmount'])
MARKEDUPTTL.append(item['breakdown']['totalCalculatedFareAmount'])
MARKUP.append(((MARKEDUPTTL[-1] - TTL[-1]) / (TTL[-1])*100))
FBC.append(item['fareBasisCode'])

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Working with SVG paths in BeautifulSoup & Python - python

Related

Problem with python code for Keithley 2450, pymeasure "apply current" does not work

IndexError: list index out of range (Pygame)

Keyerror none of are in the [index]

How to read large NetCDF data sets without using a for - Python

Pandas Dataframe Only Returning first Row of JSON Data

Categories

Resources