I'm using Pycharm 2.2 community, currently trying to get latitude and longitude from addresses. Stored the address data in a panda data frame and tried to get its geographical coordinates. However, it returned me:
"Traceback (most recent call last):
File "C:/map.py", line 19, in <module>
geocode_result = gmaps_key.geocode(data1.iat[i, 0])" and "return self.obj._get_value(*key, takeable=self._takeable)" after execution.
When hover over geocode, the tooltip returns me "unresolved attribute reference 'geocode' for class 'Client'..."
Below is my code, any help would be much appreciated, thanks!
import pandas as pd
import googlemaps
data = pd.read_csv("listing.csv", usecols=['building_no', 'street_name'])
# df = pd.DataFrame({'Year': ['2014', '2015'], 'quarter': ['q1', 'q2']})
data['address'] = data[['building_no', 'street_name']].apply(lambda x: ' '.join(x), axis=1)
data1 = data['address']
# print data1
# Set google map API key
gmaps_key = googlemaps.Client(key="AIzaSyDjB0HJolcomNZCWrtq9gef70V4F2xtB_s")
# create Geocode result object
# get LON and LAT
data1["LAT"] = None
data1["LON"] = None
for i in range(0, len(data1), 1):
geocode_result = gmaps_key.geocode(data1.iat[i, 0])
try:
lat = geocode_result[0]["geometry"]["location"]["lat"]
lon = geocode_result[0]["geometry"]["location"]["lng"]
data1.iat[i, data1.columns.get_loc("LAT")] = lat
data1.iat[i, data1.columns.get_loc("LON")] = lon
except:
lat = None
lon = None
print data1
Related
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="My App")
#tries fetch address from geopy
location = geolocator.geocode(df2['address'])
#append lat/long to column using dataframe location
df2.loc['lat'] = location.latitude
df2.loc['long'] = location.longitude
#catches exception for the case where no value is returned
#appends null value to column
df2.loc['lat'] = ""
df2.loc['long'] = ""
df2.head()
Here is the code that I tried using ^^^
geolocator = Nominatim(user_agent="My App")
location = geolocator.geocode("33000 N KINGSHIGHWAY BLVD, St.Louis" )
print((location.latitude, location.longitude))
This Code above worked when I picked only one address. But ofc I want it to run without me giving it a specific address.
def get_lat_long(address):
try:
x = geolocator.geocode(address)
return x.latitude, x.longitude
except:
return np.nan, np.nan
df[['latitude', 'longitude']] = df.apply(lambda x: get_lat_long(x.address), axis=1, result_type='expand')
print(df)
Output:
address latitude longitude
0 33000 N KINGSHIGHWAY BLVD, St.Louis 38.649933 -90.263803
This is untested, because I don't want to install the package, but this is the kind of thing you need:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="My App")
lat, lon = [], []
for addr in df2['address'].tolist():
location = geolocator.geocode(addr)
lat.append( location.latitude )
lon.append( location.longitude )
df2['latitude'] = np.array(lat)
df2['longitude'] = np.array(lon)
Or:
locs = [geolocator.geocode(addr) for addr in df2['address'].tolist()]
locs = np.array(locs)
df2['latitide'] = locs[:,0]
df2['longitude'] = locs[:,1]
I'm trying to make a map with the number of noise complaints for each zipcode and everything runs fine, but I can't get the count number to appear on the map when I hover over each area. I tried making it into an int as the error suggested, but nothing seems to work.
import pandas as pd
df2020 = pd.read_csv('/Users/kenia/Desktop/CSCI 233 Seminar Project/311_Noise_Complaints.csv',sep=',', low_memory = False)
df2020=df2020[df2020['Created Date'].str[6:10] == '2020']
df2020['Incident Zip'].fillna(0, inplace=True)
df2020['Incident Zip'] = df2020['Incident Zip'].astype(int)
df2020_zip = df2020['Incident Zip'].value_counts().to_frame().reset_index()
df2020_zip.columns = ['postal_code', 'counts']
df2020_zip['postal_code'] = df2020_zip['postal_code'].astype(str)
df2020_zip['counts'] = df2020_zip['counts'].astype(int)
import folium
nycMap = folium.Map(location=[40.693943, -73.985880], zoom_start=10)
zipLines = '/Users/kenia/Desktop/CSCI 233 Seminar Project/zipMap.geojson.json'
df2020_zip['counts'] = df2020_zip['counts'].astype(int)
df2020_zip['counts'] = pd.Series(zipLines['counts'])
count_col = df2020_zip['counts']
bins = list(df2020_zip['counts'].quantile([0,0.2,0.4,0.6,0.8,1]))
choropleth = folium.Choropleth(geo_data = zipLines,
data=df2020_zip,
columns=['postal_code', 'counts'],
key_on='feature.properties.postalCode',
fill_color='OrRd',
fill_opacity=0.7,
line_opacity=1.0,
bins = bins,
highlight=True,
legend_name="Noise Frequency in 2020"
).add_to(nycMap)
folium.LayerControl().add_to(nycMap)
choropleth.geojson.add_child(
folium.features.GeoJsonTooltip(['postalCode','PO_NAME','count_col'])
)
nycMap.save(outfile='index.html')
Error:
Traceback (most recent call last):
File "/Users/kenia/Desktop/throwaway.py", line 20, in <module>
df2020_zip['counts'] = pd.Series(zipLines['counts'])
TypeError: string indices must be integers
Dataset: https://data.cityofnewyork.us/Social-Services/311-Noise-Complaints/p5f6-bkga
Zipcode GeoJson: https://data.beta.nyc/dataset/nyc-zip-code-tabulation-areas/resource/6df127b1-6d04-4bb7-b983-07402a2c3f90?view_id=b34c6552-9fdb-4f95-8810-0588ad1a4cc8
I'm trying to create a choropleth map using folium on python and I was able to get the base map running, but when I try to add a layer with neighborhood boundaries, it does not show up on the html page. I thought maybe I had to increase the line opacity, but that doesn't seem to be it.
This is my code:
import folium
import pandas as pd
crimeData = pd.read_csv('NYC_crime.csv')
crime2020 = crimeData[crimeData.CMPLNT_FR_DT == 2020]
nycMap = folium.Map(location=[40.693943, -73.985880],zoom_start = 10)
mapLines = 'nbhdMap.geojson.json'
folium.Choropleth(geo_data = mapLines,
data = crime2020,
fill_color = 'OrRd',
fill_opacity=0.5,
line_opacity=1.0,
key_on = 'feature.geometry.coordinates',
columns = ['Lat_Lon']
)
nycMap.save(outfile='index.html')
I'm also having trouble filling the map with data. I'm trying to make it so that each complaint documented on the CSV file from 2020 is used to show which areas received the most calls. But I get this error:
Traceback (most recent call last):
File "/Users/kenia/Desktop/CSCI233/PRAC.py", line 10, in <module>
folium.Choropleth(geo_data = mapLines,
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/folium/features.py", line 1158, in __init__
color_data = data.set_index(columns[0])[columns[1]].to_dict()
IndexError: list index out of range
This is the neighborhood boundaries: https://data.beta.nyc/dataset/pediacities-nyc-neighborhoods/resource/35dd04fb-81b3-479b-a074-a27a37888ce7
And this is my data: https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243
[EDIT] So I tried #r-beginners suggestion with a simpler dataset: https://data.cityofnewyork.us/Health/Restaurants-rolled-up-/59dk-tdhz
import pandas as pd
import folium
data = pd.read_csv('nycrestaurants.csv')
data = pd.concat([data, str(data['ZIPCODE']).split(',')], axis=1)
data.columns = ['CAMIS', 'DBA', 'BORO', 'BUILDING', 'STREET', 'ZIPCODE']
resData = data.groupby(['ZIPCODE'])['DBA'].sum().reset_index()
nycMap = folium.Map(location=[40.693943, -73.985880],zoom_start = 10)
mapLines = 'zipMap.geojson.json'
folium.Choropleth(geo_data = mapLines,
data = resData,
key_on = 'feature.properties.postalCode',
columns = ['ZIPCODE', 'DBA'],
fill_color = 'OrRd',
fill_opacity=0.5,
line_opacity=1.0
).add_to(nycMap)
nycMap.save(outfile='index.html')
But now I'm getting this error message:
Traceback (most recent call last):
File "/Users/kenia/Desktop/CSCI233/PRAC.py", line 5, in <module>
data = pd.concat([data, str(data['ZIPCODE']).split(',')], axis=1)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/reshape/concat.py", line 274, in concat
op = _Concatenator(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/core/reshape/concat.py", line 359, in __init__
raise TypeError(msg)
TypeError: cannot concatenate object of type '<class 'list'>'; only Series and DataFrame objs are valid
Since you were presented with the data of the complaint in another question, you got the GEOJSON data from here for the corresponding zip code range. As for the process, we have tabulated it by the number of zip codes and tied it to the number of occurrences.
import pandas as pd
import numpy as np
df = pd.read_csv('./data/311_Noise_Complaints.csv', sep=',')
df['Incident Zip'].fillna(0, inplace=True)
df['Incident Zip'] = df['Incident Zip'].astype(int)
df_zip = df['Incident Zip'].value_counts().to_frame().reset_index()
df_zip.columns = ['postal_code', 'counts']
df_zip['postal_code'] = df_zip['postal_code'].astype(str)
import folium
nycMap = folium.Map(location=[40.693943, -73.985880], zoom_start=10)
mapLines = './data/nyc_zip_code_tabulation_areas_polygons.geojson'
choropleth = folium.Choropleth(geo_data = mapLines,
data = df_zip,
columns = ['postal_code', 'counts'],
key_on = 'feature.properties.postalcode',
fill_color = 'BuPu',
fill_opacity=0.5,
line_opacity=1.0
).add_to(nycMap)
choropleth.geojson.add_child(
folium.features.GeoJsonTooltip(['po_name'], labels=False)
)
nycMap.save(outfile='index.html')
nycMap
I have a netcdf file with global wind data and I need to extract the component of wind UGRD& VGRD for my study area (lonmin=-2, lonmax=8, latmin=35 latmax=39). I need to have a text file with this format:
time series UGRD
VGRD
Example
19790101060000 (year month day hours)
3.28 5.26 (UGRD)
2.23 2.225 (VGRD)
I tried to do this operation with python. I succeeded to extract my study area in nc file but, I still tried to convert it to a text file but I've failed. Can someone help me to do it, please?
import numpy as np
import netCDF4
import netCDF4 as nc
import pandas as pd
import numpy as np
import csv
#### === User-inputs ====#####
one = nc.Dataset('1979.nc') ##load one of your nc datafiles
print one.variables ## Check variables names, say my variable names are lat, lon, pre
## Name of the variables
lat_name = 'latitude'
lon_name = 'longitude'
time_name = 'time'
data_name1 = 'UGRD_10maboveground'
data_name2 = 'VGRD_10maboveground'
## Select spatial range for which data to be extracted
mylat1 = 35
mylat2 = 39
mylon1 = -2
mylon2 = 8
##Give a name of your extracted datafile and define units
newfilename = 'Extracted_Data'
time_unit = 'day since 1979-01-01 00:00'
lat_unit = 'degrees_south'
lon_unit = 'degrees_east'
data_unit = 'm/s'
#### ======= Rest of the Code is Automated ========######
##Find pixel-range based on the provided lat-lon
lat = one.variables[lat_name][:]
lon = one.variables[lon_name][:]
ver_pix = []
for i in xrange(0, len(lat)):
if lat[i] >= mylat1 and lat[i] <= mylat2:
ver_pix.append(i)
y_min = min(ver_pix)
y_max = max(ver_pix)
print lat[min(ver_pix):max(ver_pix)]
hor_pix = []
for j in xrange(0,len(lon)):
if lon[j] >= mylon1 and lon[j] <= mylon2:
hor_pix.append(j)
x_min = min(hor_pix)
x_max = max(hor_pix)
print lon[min(hor_pix):max(hor_pix)]
check_range1 = one.variables[data_name1][:,y_min:y_max,x_min:x_max] ##pre:lat:lon =
time,y,x
check_range2 = one.variables[data_name2][:,y_min:y_max,x_min:x_max]
#print check_range
print check_range1.shape
print check_range2.shape
## Load all nc files in the directory from which data to be extracted
## ..for the selected area
f = nc.MFDataset('1979.nc')
alldata = f.variables[data_name1][:,y_min:y_max,x_min:x_max]
alldata = f.variables[data_name2][:,y_min:y_max,x_min:x_max]
lat1 = one.variables[lat_name][y_min:y_max]
lon1 = one.variables[lon_name][x_min:x_max]
#time = one.variables[time_name][:]
ncfile = nc.Dataset(''+str(newfilename)+'.nc','w')
ncfile.createDimension(time_name,len(alldata))
ncfile.createDimension(lat_name,len(lat1))
ncfile.createDimension(lon_name,len(lon1))
time = ncfile.createVariable(time_name,np.dtype('float32').char,(time_name,))
lats = ncfile.createVariable(lat_name,np.dtype('float32').char,(lat_name,))
lons = ncfile.createVariable(lon_name,np.dtype('float32').char,(lon_name,))
time.units = time_unit
lats.units = lat_unit
lons.units = lon_unit
time[:] = np.linspace(1,len(alldata),len(alldata))
lats[:] = lat1
lons[:] = lon1
newdata1 = ncfile.createVariable(data_name1,np.dtype('float32').char,
(time_name,lat_name,lon_name))
newdata2 = ncfile.createVariable(data_name2,np.dtype('float32').char,
(time_name,lat_name,lon_name))
newdata1.units = data_unit
newdata2.units = data_unit
newdata1[:] = alldata[:]
newdata2[:] = alldata[:]
dtime=netCDF4.num2date(time[:],time.units)
UGRD_ts=pd.Series(data_name2,index=dtime)
UGRD_ts.to_csv('data1.csv', index=True, header=True)
Task: to write a function for changing the timeframe on encodings.
Body of the program:
import pandas as pd
import numpy as np
from future_functions import *
# Load CSV Data
data = pd.read_csv('Data/EURUSDHours.csv')
data.columns = ['Date','open','high','low','close','AskVol']
data = data.set_index(pd.to_datetime(data.Date))
data = data[['open','high','low','close','AskVol']]
prices = data.drop_duplicates(keep=False)
hkaprices = prices.copy()
hkaprices['Symbol'] = 'SYMB'
HKA = OHLCresample(hkaprices,'15H')
, where future_functions is an attached file with a function.
The function itself:
def OHLCresample(DataFrame,TimeFrame,column='ask'):
grouped = DataFrame.groupby('Symbol')
if np.any(DataFrame.columns == 'Ask'):
if column == 'ask':
ask = grouped['Ask'].resample(TimeFrame).ohlc()
askVol = grouped['AskVol'].resample(TimeFrame).count()
resampled = pd.DataFrame(ask)
resampled['AskVol'] = askVol
elif column == 'bid':
bid = grouped['Bid'].resample(TimeFrame).ohlc()
bidVol = grouped['BidVol'].resample(TimeFrame).count()
resampled = pd.DataFrame(bid)
resampled['BidVol'] = bidVol
else:
raise ValueError('Column must be a string. Either ask or bid')
elif np.any(DataFrame.columns == 'close'):
open = grouped['open'].resample(TimeFrame).ohlc()
close = grouped['close'].resample(TimeFrame).ohlc()
high = grouped['high'].resample(TimeFrame).ohlc()
low = grouped['low'].resample(TimeFrame).ohlc()
askVol = grouped['AskVol'].resample(TimeFrame).ohlc()
resampled = pd.DataFrame(open)
resampled['high'] = high
resampled['low'] = low
resampled['close'] = close
resampled['AskVol'] = askVol
resampled = resampled.dropna()
return resampled
I receive an error:
KeyError: 'AskVol'
ValueError: Wrong number of items passed 4, placement implies 1
The data set can be taken following the link:
https://nofile.io/f/Q9AKjGbSUHd/EURUSDHours.csv
I understand that the problem is in dimensionality, but I do not know how to resolve it.