i am trying to handle data in python using pandas , I have this data
import folium
import pandas
mapp = folium.Map(location=[19.997454,73.789803], zoom_start=6, tiles="Stamen Terrain" )
fg = folium.FeatureGroup(name="my map")
df=pandas.read_csv("volcanoes.txt")
cordinates="[" + df["LAT"].astype(str) + "," + df["LON"].astype(str) +"]"
for i in cordinates:
fg.add_child(folium.Marker(location=i,popup="hey jayesh , welcome to Nashik",icon=folium.Icon(color="green")))
mapp.add_child(fg)
mapp.save("jay1.html")
> Windows PowerShell Copyright (C) Microsoft Corporation. All rights
> reserved.
>
> Try the new cross-platform PowerShell https://aka.ms/pscore6
>
> PS C:\Users\DELL\OneDrive\Desktop\python\volcano> &
> C:/Users/DELL/AppData/Local/Programs/Python/Python39/python.exe
> c:/Users/DELL/OneDrive/Desktop/python/volcano/jayesh.py Traceback
> (most recent call last): File
> "c:\Users\DELL\OneDrive\Desktop\python\volcano\jayesh.py", line 10, in
> <module>
> fg.add_child(folium.Marker(location=i,popup="hey jayesh , welcome to Nashik",icon=folium.Icon(color="green"))) File
> "C:\Users\DELL\AppData\Local\Programs\Python\Python39\lib\site-packages\folium\map.py",
> line 277, in __init__
> self.location = validate_location(location) if location else None File
> "C:\Users\DELL\AppData\Local\Programs\Python\Python39\lib\site-packages\folium\utilities.py",
> line 50, in validate_location
> raise ValueError('Expected two (lat, lon) values for location, ' ValueError: Expected two (lat, lon) values for location, instead got:
> '[48.7767982,-121.810997]'. PS
> C:\Users\DELL\OneDrive\Desktop\python\volcano>
The problem is with this line:
cordinates="[" + df["LAT"].astype(str) + "," + df["LON"].astype(str) +"]"
You are generating a string literal and passing that in.
Try replacing that line with:
cordinates = [(lat, lon) for lat, lon in zip(df["LAT"],df["LON"])]
This will generate a list of (lat, lon) tuples, which should work. I also don't think you need to cast them to str
**it work for me like his**
import folium
import pandas
mapp = folium.Map(location=[19.997454,73.789803], zoom_start=6, tiles="Stamen Terrain" )
fg = folium.FeatureGroup(name="my map")
df=pandas.read_csv("volcanoes.txt")
lat=list(df["LAT"])
lon=list(df["LON"])
for i,j in zip(lat,lon):
fg.add_child(folium.Marker(location=[i,j],popup="volcanoes",icon=folium.Icon(color="green")))
mapp.add_child(fg)
mapp.save("volcanoes.html")
Related
i would like use "recherche_distance" function for calculate distance with neo4j.
I use cypher query and i had an error :
nosql: <V2_NoSqlDataAccess.NoSqlDataAccess object at 0x000001A283C95BE0>
<class 'float'>
<class 'float'>
lat = 45.438, longi = 4.3862
Traceback (most recent call last):
File "c:\Datascientest\neo4j\test_projet\test.py", line 9, in <module>
result = nsql.recherche_distance(lat, longi)
File "c:\Datascientest\neo4j\test_projet\V2_NoSqlDataAccess.py", line 49, in recherche_distance
cypher_query = '''
KeyError: 'x '
This is my code :
from neo4j import GraphDatabase, basic_auth
import pandas as pd
import math
class Point:
def __init__(self,longitude,latitude, lat, longi):
self.longitude=longitude
self.latitude=latitude
self.lat=lat
self.longi=longi
class NoSqlDataAccess:
def __init__(self):
self.driver = GraphDatabase.driver('bolt://localhost:11003',auth=basic_auth("neo4j", "xxxx"))
def close(self):
self.driver.close()
def create_localisation_line(tx,type, latitude, longitude, lieu,code_insee,id_datatourisme):
query = "CREATE (n:Localisation {type: $type ,latitude: $latitude,longitude: $longitude, lieu: $lieu,code_insee: $code_insee,id_datatourisme: $id_datatourisme}) RETURN id(n) AS node_id"
tx.run(query,type = type,latitude = latitude, longitude = longitude, lieu = lieu,code_insee=code_insee,id_datatourisme=id_datatourisme)
def add_localisations(self,datagraph):
with self.driver.session() as session:
for i, row in datagraph.iterrows():
#if i==100000:
# break
#else:
type = row['type']
latitude = row['latitude']
longitude = row['longitude']
lieu = row['adresse']
code_insee=row['code_insee']
id_datatourisme=row['id']
query = "CREATE (n:Localisation {type:"+str(row['type'])+",latitude:"+str(latitude)+",longitude: "+str(longitude)+", lieu: "+str(lieu)+",code_insee: "+str(code_insee)+",id_datatourisme:"+str(id_datatourisme)+"})"
print(query)
session.run(query)
self.close()
def recherche_distance(self, lat, longi):
with self.driver.session() as session:
cypher_query = '''
MATCH (s1:Info)
WITH point({x : toFloat(s1.latitude), y : toFloat(s1.longitude)}) AS p1, point({x:toFloat({lat}), y:toFloat({longi})}) AS p2, s1
RETURN point.distance(p1,p2) AS Distance, s1.lieu AS Lieu ORDER BY Distance
'''.format(lat=lat, longi=longi)
print(f'Requête Cypher : {cypher_query}')
result = session.run(cypher_query)
print(f'Résultat de la requête : {result}')
return result.fetch(5)
self.close()
This is my second code for use query :
`
from V2_NoSqlDataAccess import NoSqlDataAccess
nsql = NoSqlDataAccess()
print('nosql:', nsql)
lat = 45.4380
print(type(lat))
longi = 4.3862
print(type(longi))
print(f'lat = {lat}, longi = {longi}')
result = nsql.recherche_distance(lat, longi)
print(result)
I think, i have a problem with the variable "lat" and "longi" i didn't find the solution.
I haven't error if i write lat and longi manually
Thank you for your help :-)
You need to escape the {} found in the query. Use {{ and }} to escape {}, respectively.
OLD:
WITH point({x : toFloat(s1.latitude), y : toFloat(s1.longitude)}) AS p1, point({x:toFloat({lat}), y:toFloat({longi})}) AS p2, s1
NEW:
WITH point({{x: toFloat(s1.latitude), y: toFloat(s1.longitude)}}) AS p1, point({{x: toFloat({lat}), y: toFloat({longi})}}) AS p2, s1
This is because python format string function "thinks" that you are passing a parameter named x. But this is actually a part of the cypher query.
this is the original repo i'm trying to run in my computer: https://github.com/kreamkorokke/cs244-final-project
import os
import matplotlib.pyplot as plt
import argparse
from attacker import check_attack_type
IMG_DIR = "./plots"
def read_lines(f, d):
lines = f.readlines()[:-1]
for line in lines:
typ, time, num = line.split(',')
if typ == 'seq':
d['seq']['time'].append(float(time))
d['seq']['num'].append(float(num))
elif typ == 'ack':
d['ack']['time'].append(float(time))
d['ack']['num'].append(float(num))
else:
raise "Unknown type read while parsing log file: %s" % typ
def main():
parser = argparse.ArgumentParser(description="Plot script for plotting sequence numbers.")
parser.add_argument('--save', dest='save_imgs', action='store_true',
help="Set this to true to save images under specified output directory.")
parser.add_argument('--attack', dest='attack',
nargs='?', const="", type=check_attack_type,
help="Attack name (used in plot names).")
parser.add_argument('--output', dest='output_dir', default=IMG_DIR,
help="Directory to store plots.")
args = parser.parse_args()
save_imgs = args.save_imgs
output_dir = args.output_dir
attack_name = args.attack
if save_imgs and attack_name not in ['div', 'dup', 'opt'] :
print("Attack name needed for saving plot figures.")
return
normal_log = {'seq':{'time':[], 'num':[]}, 'ack':{'time':[], 'num':[]}}
attack_log = {'seq':{'time':[], 'num':[]}, 'ack':{'time':[], 'num':[]}}
normal_f = open('log.txt', 'r')
attack_f = open('%s_attack_log.txt' % attack_name, 'r')
read_lines(normal_f, normal_log)
read_lines(attack_f, attack_log)
if attack_name == 'div':
attack_desc = 'ACK Division'
elif attack_name == 'dup':
attack_desc = 'DupACK Spoofing'
elif attack_name == 'opt':
attack_desc = 'Optimistic ACKing'
else:
raise 'Unknown attack type: %s' % attack_name
norm_seq_time, norm_seq_num = normal_log['seq']['time'], normal_log['seq']['num']
norm_ack_time, norm_ack_num = normal_log['ack']['time'], normal_log['ack']['num']
atck_seq_time, atck_seq_num = attack_log['seq']['time'], attack_log['seq']['num']
atck_ack_time, atck_ack_num = attack_log['ack']['time'], attack_log['ack']['num']
plt.plot(norm_seq_time, norm_seq_num, 'b^', label='Regular TCP Data Segments')
plt.plot(norm_ack_time, norm_ack_num, 'bx', label='Regular TCP ACKs')
plt.plot(atck_seq_time, atck_seq_num, 'rs', label='%s Attack Data Segments' % attack_desc)
plt.plot(atck_ack_time, atck_ack_num, 'r+', label='%s Attack ACKs' % attack_desc)
plt.legend(loc='upper left')
x = max(max(norm_seq_time, norm_ack_time),max(atck_seq_time, atck_ack_time))
y = max(max(norm_seq_num, norm_ack_num),max(atck_seq_num, atck_ack_num))
plt.xlim(0, x)
plt.ylim(0,y)
plt.xlabel('Time (s)')
plt.ylabel('Sequence Number (Bytes)')
if save_imgs:
# Save images to figure/
if not os.path.exists(output_dir):
os.makedirs(output_dir)
plt.savefig(output_dir + "/" + attack_name)
else:
plt.show()
normal_f.close()
attack_f.close()
if __name__ == "__main__":
main()
after running this i get this error
Traceback (most recent call last):
File "plot.py", line 85, in <module>
main()
File "plot.py", line 66, in main
plt.xlim(0, a)
File "/usr/lib/python3/dist-packages/matplotlib/pyplot.py", line 1427, in xlim
ret = ax.set_xlim(*args, **kwargs)
File "/usr/lib/python3/dist-packages/matplotlib/axes/_base.py", line 3267, in set_xlim
reverse = left > right
TypeError: '>' not supported between instances of 'int' and 'list'
Done! Please check ./plots for all generated plots.
how can i solve this problem? or better yet if there is another way of running this project? i installed matplotlib via pip3 install matplotlib command (same with scapy) and my main python version is python2 right now but i run the project with python3, could the issue be about this? what am i missing? or is it about mininet itself?
The problem is in this line
x = max(max(norm_seq_time, norm_ack_time),max(atck_seq_time, atck_ack_time))
IIUC, you wanna assign to x the maximum value among all those four lists. However, when you pass two lists to the max function, such as max(norm_seq_time, norm_ack_time), it will return the list it considers the greater one, and not the highest value considering both lists.
Instead, you can do something like:
x = max(norm_seq_time + norm_ack_time + atck_seq_time + atck_ack_time)
This will concatenate the four lists into a single one. Then, the function will return the highest value among all of them. You might wanna do that to the calculation of y as well.
If this is not what you wanted, or if you have any further issues, please let us know.
with the help of a friend we solved this problem by changing a part in code into this:
max_norm_seq_time = max(norm_seq_time) if len(norm_seq_time) > 0 else 0
max_norm_ack_time = max(norm_ack_time) if len(norm_ack_time) > 0 else 0
max_atck_seq_time = max(atck_seq_time) if len(atck_seq_time) > 0 else 0
max_atck_ack_time = max(atck_ack_time) if len(atck_ack_time) > 0 else 0
x = max((max_norm_seq_time, max_norm_ack_time,\
max_atck_seq_time, max_atck_ack_time))
plt.xlim([0,x])
max_norm_seq_num = max(norm_seq_num) if len(norm_seq_num) > 0 else 0
max_norm_ack_num = max(norm_ack_num) if len(norm_ack_num) > 0 else 0
max_atck_seq_num = max(atck_seq_num) if len(atck_seq_num) > 0 else 0
max_atck_ack_num = max(atck_ack_num) if len(atck_ack_num) > 0 else 0
plt.ylim([0, max((max_norm_seq_num, max_norm_ack_num,\
max_atck_seq_num, max_atck_ack_num))])
```
writing here just in case anyone else needs it.
I'm trying to translate part of SQuAD 1.1 dataset to Sinhalese. I don't know whether i can use the json file straight into translation
What i tried so far is making a little dataframe of SQuAD dataset and try to translate that as a demo to myself. But i got different errors. Below is the error i'm getting now. Can you help me to fix that error or tell me a better way to complete my task using python.
```import googletrans
from googletrans import Translator
import os
from google.cloud import translate_v2 as translate
os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r"C:\Users\Sathsara\Documents\Python Learning\Translation test\translationAPI\flash-medley-278816-b2012b874797.json"
# create a translator object
translator = Translator()
# use translate method to translate a string - by default, the destination language is english
translated = translator.translate('I am Sathsara Rasantha',dest='si')
# the translate method returns an object
print(translated)
# obtain translated string by using attribute .text
translated.text
import pandas as pd
translate_example = pd.read_json("example2.json")
translate_example
contexts = []
questions = []
answers_text = []
answers_start = []
for i in range(translate_example.shape[0]):
topic = translate_example.iloc[i,0]['paragraphs']
for sub_para in topic:
for q_a in sub_para['qas']:
questions.append(q_a['question'])
answers_start.append(q_a['answers'][0]['answer_start'])
answers_text.append(q_a['answers'][0]['text'])
contexts.append(sub_para['context'])
df = pd.DataFrame({"context":contexts, "question": questions, "answer_start": answers_start, "text": answers_text})
df
df=df.loc[0:2,:]
df
# make a deep copy of the data frame
df_si = df.copy()
# translate columns' name using rename function
df_si.rename(columns=lambda x: translator.translate(x).text, inplace=True)
df_si.columns
translations = {}
for column in df_si.columns:
# unique elements of the column
unique_elements = df_si[column].unique()
for element in unique_elements:
# add translation to the dictionary
translations[element] = translator.translate(element,dest='si').text
print(translations)
# modify all the terms of the data frame by using the previously created dictionary
df_si.replace(translations, inplace = True)
# check translation
df_si.head()```
This is the error i get
> --------------------------------------------------------------------------- TypeError Traceback (most recent call
> last) <ipython-input-24-f55a5ca59c36> in <module>
> 5 for element in unique_elements:
> 6 # add translation to the dictionary
> ----> 7 translations[element] = translator.translate(element,dest='si').text
> 8
> 9 print(translations)
>
> ~\Anaconda3\lib\site-packages\googletrans\client.py in translate(self,
> text, dest, src)
> 170
> 171 origin = text
> --> 172 data = self._translate(text, dest, src)
> 173
> 174 # this code will be updated when the format is changed.
>
> ~\Anaconda3\lib\site-packages\googletrans\client.py in
> _translate(self, text, dest, src)
> 73 text = text.decode('utf-8')
> 74
> ---> 75 token = self.token_acquirer.do(text)
> 76 params = utils.build_params(query=text, src=src, dest=dest,
> 77 token=token)
>
> ~\Anaconda3\lib\site-packages\googletrans\gtoken.py in do(self, text)
> 199 def do(self, text):
> 200 self._update()
> --> 201 tk = self.acquire(text)
> 202 return tk
>
> ~\Anaconda3\lib\site-packages\googletrans\gtoken.py in acquire(self,
> text)
> 144 a = []
> 145 # Convert text to ints
> --> 146 for i in text:
> 147 val = ord(i)
> 148 if val < 0x10000:
>
> TypeError: 'numpy.int64' object is not iterable
I have the code as below but end up return an error which is uncommon and can't find any information to understand what is wrong.
TypeError: list indices must be integers, not _ElementStringResult
My code is as below:
from lxml import html
import requests
import csv
import pandas as pd
import numpy as np
StockData = ['KTC','DANCO','PTRANS']
Stocklen = len(StockData)
df_Year = pd.DataFrame(columns=['Stock','Year'])
df_ROE = pd.DataFrame(columns=['Stock','ROE'])
for x in range (len(StockData)):
print "############### "
print StockData [x]
print "###############"
page_wsj2 = requests.get('http://abc/'+StockData[x]+'/financials/quartre/income-statement')
wsj2 = html.fromstring(page_wsj2.content)
page_wsj4 = requests.get('http://abc/'+StockData[x]+'/financials/quartre/balance-sheet')
wsj4 = html.fromstring(page_wsj4.content)
Year = 2016
df_Year.loc[len(df_Year)] = [StockData[x],Year]
NI4Q = wsj2.xpath('//tr[.="Net Income"]/following-sibling::td/text()')
NI4Q = [x.replace(",", "") for x in NI4Q]
NI4Q = ['-' + y.strip('()') if '(' in y else y for y in NI4Q]
print NI4Q
NI = float(NI4Q[0])+float(NI4Q[1])+float(NI4Q[2])+float(NI4Q[3])
print NI
Equity = wsj4.xpath('//td[.="Total Equity"]/following-sibling::td/text()')[0]
#NI = NI.replace(',','') #float object has no attribute 'replace'
Equity = Equity.replace(',','')
print NI
print Equity
Equity = float(Equity)
wsj_calROE = (NI/Equity)*100
ROE = wsj_calROE
print "ROE"
print ROE
df_ROE.loc[len(df_ROE)] = [StockData[x],"%.2f"%ROE]
From the output, the calculation part of ROE is correct until the line of df_ROE.loc[len(df_ROE)] = [StockData[x],"%.2f"%ROE]. It is rather weird. ROE is float now. What format I should do let it work correctly?
###############
KTC
###############
['-710.0', '716.0', '913.0', '246.0', '1041.0', ' ', ' ']
1165.0
1165.0
93714.0
wow
1165.0
ROE
1.24314403398
Traceback (most recent call last): File "/home/tyua/marketdata.py", line 84, in <module> df_ROE.loc[len(df_ROE)] = [StockData[x],ROE]TypeError: list indices must be integers, not _ElementStringResult
I got the below error while trying to run my python script via pandas, when runing on a 30 millon records data , please advise what went wrong
Traceback (most recent call last): File "extractyooochoose2.py", line 32, in totalitems=[len(x) for x in clicksdat.groupby('Sid')['itemid'].unique()]
File "", line 13, in unique
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/pandas/core/groupby.py", line 620, in wrapper
raise ValueError
Data and code as shown below
import pandas as pd
import datetime as dt
clickspath='/tmp/gensim/yoochoose/yoochoose-clicks.dat'
buyspath='/tmp/gensim/yoochoose/yoochoose-buys.dat'
clicksdat=pd.read_csv(clickspath,header=None,dtype={'itemid': pd.np.str_,'Sid':pd.np.str_,'Timestamp':pd.np.str_,'itemcategory':pd.np.str_})
clicksdat.columns=['Sid','Timestamp','itemid','itemcategory']
buysdat=pd.read_csv(buyspath,header=None)
buysdat.columns=['Sid','Timestamp','itemid','price','qty']
segment={}
for i in range(24):
if i<7:
segment[i]='EM'
elif i<10:
segment[i]='M'
elif i<13:
segment[i]='A'
elif i<18:
segment[i]='E'
elif i<23:
segment[i]='N'
elif i<25:
segment[i]='MN'
#*******************************************
buyersession=buysdat.Sid.unique()
clickersession=clicksdat.Sid.unique()
maxtemp=[(dt.datetime.strptime(x,"%Y-%m-%dT%H:%M:%S.%fZ")) for x in clicksdat.groupby('Sid')['Timestamp'].max()]
mintemp=[dt.datetime.strptime(x,"%Y-%m-%dT%H:%M:%S.%fZ") for x in clicksdat.groupby('Sid')['Timestamp'].min()]
duration=[int((a-b).total_seconds()) for a,b in zip(maxtemp,mintemp)]
day=[x.day for x in maxtemp]
month=[x.month for x in maxtemp]
noofnavigations=[clicksdat.groupby('Sid').count().Timestamp][0]
totalitems=[len(x) for x in clicksdat.groupby('Sid')['itemid'].unique()]
totalcats=[len(x) for x in clicksdat.groupby('Sid')['itemcategory'].unique()]
timesegment= [segment[x.hour]for x in maxtemp]
segmentchange=[1 if (segment[x.hour]!=segment[y.hour]) else 0 for x,y in zip(maxtemp,mintemp)]
purchased=[x in buyersession for x in noofnavigations.index.values ]
percentile_list = pd.DataFrame({'purchased' : purchased,'duration':duration,'day':day,'month':month,'noofnavigations':noofnavigations,'totalitems':totalitems,'totalcats':totalcats,'timesegment':timesegment,'segmentchange':segmentchange })
percentile_list.to_csv('/tmp/gensim/yoochoose/yoochoose-clicks1001.csv')
Sample data as shown below
sessioid,timestamp,itemid,category
1,2014-04-07T10:51:09.277Z,214536502,0
1,2014-04-07T10:54:09.868Z,214536500,0
1,2014-04-07T10:54:46.998Z,214536506,0
1,2014-04-07T10:57:00.306Z,214577561,0
2,2014-04-07T13:56:37.614Z,214662742,0
2,2014-04-07T13:57:19.373Z,214662742,0
2,2014-04-07T13:58:37.446Z,214825110,0
2,2014-04-07T13:59:50.710Z,214757390,0