Using Python, Tweepy, Insert Cursor and Arcpy - python

I am super new to Python so forgive me for my lack of knowledge haha but for some reason I cannot get Python to insert rows in my database. Here is what I have:
import sys, arcpy, datetime, tweepy
consumer_key = " "
consumer_secret = " "
access_token = " "
access_token_secret = " "
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
table = r"C:\....dbf"
rows = arcpy.InsertCursor(table)
class CustomStreamListener(tweepy.StreamListener):
def on_status(self, status):
try:
user = status.user.screen_name
tweet = status.text
coord_x = status.coordinates['coordinates'][0]
coord_y = status.coordinates['coordinates'][1]
date_utc = status.created_at
h_m_s_utc = (str(status.created_at.hour))+':'+(str(status.created_at.minute))+':'+(str(status.created_at.second))
date_est = datetime.datetime.now()
h_m_s_est = (str(date_est.hour))+':'+(str(date_est.minute))+':'+(str(date_est.second))
row.user_name=user
row.tweet=tweet
row.coord_x=coord_x
row.coord_y=coord_y
row.date_utc=date_utc
row.h_m_s_utc=h_m_s_utc
row.date_est=date_est
rows.insertRow(row)
del row, rows
insert_table= r"C:\....dbf"
insert_row(insert_table)
print user
print tweet
except:
# If there are no coordinates for a tweet, then pass
pass
def on_error(self, status_code):
print >> sys.stderr, 'Encountered error with status code:', status_code
return True # Don't kill the stream
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True # Don't kill the stream
# ----------------Script execution----------------
listener = tweepy.streaming.Stream(auth, CustomStreamListener())
listener.filter(track=[' love ', '#love'])
I am pretty sure it has something to do with the row.rowID thing.
Sorry if it is a disaster! Any help is much appreciated!

I looks like you're forgetting to call the data access (.da) method for the insert cursor.
with arcpy.da.InsertCursor(in_table, field_names) as inCursor:
for row in rows:
inCursor.insertRow(row) # example
-or-
inCursor = arcpy.da.InsertCursor(in_table, field_names)
for row in rows:
cursor.insertRow(row) # example
del inCursor # make sure to delete cursor if you do it this way as to avoid data lock.
Also, if you just want the Insert Cursor method, you can
from arcpy import da
For more info, check out:
http://resources.arcgis.com/en/help/main/10.2/index.html#//018w0000000t000000

Related

TabError: inconsistent use of tabs and spaces in indentation (except: ^)

I have a code like as below.. but there is something wrong in my main code.. Anyone can help me to spot the problem... I am literally stuck at this indentation error... It says there is indentation error occurered at except: in the main function...
import tweepy
from datetime import datetime,timedelta
import csv
def get_all_tweets(screen_name):
consumer_key = *
consumer_secret = *
access_key = *
access_secret = *
#authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth, wait_on_rate_limit_notify=True)
noRT = []
search_terms = 'superbowl ', 'super bowl ', '#superbowl'
for page in tweepy.Cursor(api.user_timeline,
screen_name = screen_name,
tweet_mode="extended",
wait_on_rate_limit=True,
include_retweets=False,
count=100).pages(20):
for status in page:
noRT.append([status.id_str, status.created_at, status.full_text.encode("utf-8")])
# do your process on status
with open('{}_tweets.csv'.format(screen_name), 'w') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text"])
writer.writerows(noRT)
print('{}_tweets.csv was successfully created.'.format(screen_name))
pass
if __name__ == '__main__':
usernames = ["GregoryBlakley","Minihova","TheAMRCentre","throse_gd","CDCgov","TheAMRCentre","GuyFema","EndGameWW3","ABC","Childishnegrit0","WorldPeace24_7","byetofi","mumabear13"]
for x in usernames:
try:
get_all_tweets(x)
except:
print "%s does not exist" % (twitter_id)
pass
What's wrong with this code?
if __name__ == '__main__':
usernames = ["GregoryBlakley","Minihova","TheAMRCentre","throse_gd","CDCgov","TheAMRCentre","GuyFema","EndGameWW3","ABC","Childishnegrit0","WorldPeace24_7","byetofi","mumabear13"]
for x in usernames:
try:
get_all_tweets(x)
except:
print "%s does not exist" % (twitter_id)
pass
You're mixing tabs and spaces inside your for loops, e.g.:
Python 3 disallows mixing the use of tabs and spaces for indentation.

Tweepy: crawl live streaming tweets and save in to a .csv file

After reading streaming with Tweepy and going through this example. I tried to write a tweepy app to crawl live stream data with the tweepy Api and save it to .csv file. When I run my code, it returns empty csv file ('OutputStreaming.csv') with column names['Date', 'Text', 'Location','Number_Follower','User_Name', 'Friends_count','Hash_Tag], not the stream tweets. I also tried to do it in this way also this one, but I am getting the same out put with my code:-
def on_status(self, status):
with open('OutputStreaming.csv', 'w') as f:
f.write(['Author,Date,Text')
writer = csv.writer(f)
writer.writerow([status.created_at.strftime("%Y-%m-%d \
%H:%M:%S")status.text.encode,
status.location,
status.Number_of_follwers,
status.author.screen_name,
status.friends_count])
I got stuck. I can’t figure out where is the problem with the code, my code look like this:-
import tweepy
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json #data
#Variables that contains the user credentials to access Twitter API
access_token = "***"
access_token_secret = "***"
consumer_key = "***"
consumer_key_secret = "***"
auth = tweepy.OAuthHandler(consumer_key, consumer_key_secret)
auth.set_access_token(access_token, access_token_secret)
#setup api
api = tweepy.API(auth)
class CustomStreamListener(tweepy.StreamListener):
def on_data(self,data):
if data:
tweet_json = json.loads(data)
if tweet_json:
if not tweet_json['text'].strip().startswith('RT '):
Created = data.created_at.strftime("%Y-%m-%d-%H:%M:%S")`
Text = data.text.encode('utf8')
Location = data.location('utf8')
Follower = data.Number_of_follwers('utf8')
Name = data.author.screen_name('utf8')
Friend = data.friends_count('utf8')
with open('OutputStreaming.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow([Created, Text ,Loaction\
,Follower ,Name ,Friend,status.entities.get('hashtags')])
Time.sleep(10)
return True
def on_error(self, status_code):
if status_code == 420:
return False
else:
print >> sys.stderr, 'Encountered error with status code:',\
status_code
def on_timeout(self):
print >> sys.stderr, 'Timeout...'
return True
# Writing csv titles
with open('OutputStreaming.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow(['Date', 'Text', 'Location','Number_Follower',
'User_Name', 'Friends_count','Hash_Tag'])
if __name__ == '__main__':
l = CustomStreamListener()
streamingAPI = tweepy.streaming.Stream(api.auth, l)
streamingAPI.filter(track=['#Yoga','#Meditation'])
Here is a working code :
#!/usr/bin/python3
# coding=utf-8
import tweepy
SEP = ';'
csv = open('OutputStreaming.csv','a')
csv.write('Date' + SEP + 'Text' + SEP + 'Location' + SEP + 'Number_Follower' + SEP + 'User_Name' + SEP + 'Friends_count\n')
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
Created = status.created_at.strftime("%Y-%m-%d-%H:%M:%S")
Text = status.text.replace('\n', ' ').replace('\r', '').replace(SEP, ' ')
Location = ''
if status.coordinates is not None:
lon = status.coordinates['coordinates'][0]
lat = status.coordinates['coordinates'][1]
Location = lat + ',' + lon
Follower = str(status.user.followers_count)
Name = status.user.screen_name
Friend = str(status.user.friends_count)
csv.write(Created + SEP + Text + SEP + Location + SEP + Follower + SEP + Name + SEP + Friend + '\n')
def on_error(self, status_code):
print(status_code)
consumer_key = '***'
consumer_secret = '***'
access_token = '***'
access_token_secret = '***'
# stream
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
myStream = tweepy.Stream(auth, MyStreamListener())
myStream.filter(track=['#Yoga','#Meditation'])

Python MYSQL Data Inserted Twice

When a single hash is found in the tweet URL , The script inserts the values into MYSQL DB properly . When there is 2 or more hash found in the tweet URL , The records are inserted twice in the MYSQL DB.
For example , If a tweet has 2 URLS with hashes mentioned , In MYSQL DB 4 records are created.
DB State:
"https://www.virustotal.com/en/file/2819e520dea611c4dd1c3b1fd54adbd0c50963ff75d67cc7facbe2090574afc0/analysis/","2017-09-20 01:00:35","2819e520dea611c4dd1c3b1fd54adbd0c50963ff75d67cc7facbe2090574afc0"
"https://www.virustotal.com/en/file/8084880e875b4dc97ccd9f97249d4c7184f6be092679d2b272ece2890306ca89/analysis/","2017-09-20 01:03:35","8084880e875b4dc97ccd9f97249d4c7184f6be092679d2b272ece2890306ca89"
"https://www.virustotal.com/en/file/b5034183d4d2aca1e586b4a4bf22f32e4204c4b6d288c171d5252636c11248a0/analysis/","2017-09-20 01:03:35","8084880e875b4dc97ccd9f97249d4c7184f6be092679d2b272ece2890306ca89"
"https://www.virustotal.com/en/file/8084880e875b4dc97ccd9f97249d4c7184f6be092679d2b272ece2890306ca89/analysis/","2017-09-20 01:03:35","b5034183d4d2aca1e586b4a4bf22f32e4204c4b6d288c171d5252636c11248a0"
"https://www.virustotal.com/en/file/b5034183d4d2aca1e586b4a4bf22f32e4204c4b6d288c171d5252636c11248a0/analysis/","2017-09-20 01:03:35","b5034183d4d2aca1e586b4a4bf22f32e4204c4b6d288c171d5252636c11248a0"
Any suggestions on how to insert only single entries to DB ?
#! /usr/bin/python
from __future__ import print_function
import tweepy
import json
import MySQLdb
import time
import json, urllib, urllib2, argparse, hashlib, re, sys
from dateutil import parser
WORDS = ['virustotal']
CONSUMER_KEY = "XXXX"
CONSUMER_SECRET = "YYY"
ACCESS_TOKEN = "AAAA"
ACCESS_TOKEN_SECRET = "DDDDD"
HOST = "192.168.150.1"
USER = "admin"
PASSWD = "admin"
DATABASE = "twitter"
def store_data(values, insert_time, insert_hash):
db=MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE, charset="utf8")
cursor = db.cursor()
data = []
#print(hashes)
for value in values:
data.append((value, insert_time, insert_hash))
cursor.executemany("""INSERT INTO tweet_url VALUES (%s,%s,%s)""",data)
db.commit()
cursor.close()
db.close()
return
class StreamListener(tweepy.StreamListener):
def on_connect(self):
print("We are now connected to the streaming API.")
def on_error(self, status_code):
print('An Error has occured: ' + repr(status_code))
return False
def on_data(self, data):
try:
datajson = json.loads(data)
web_url= datajson['entities']['urls']
#print(web_url)
urls=[]
for i in web_url:
urls.append((i['expanded_url']))
values = [list([item]) for item in urls]
list_url = ','.join([str(i) for i in values])
extract_url=str(list_url)
formatted_url=''.join(extract_url)
sha256_hash=re.findall(r"([a-fA-F\d]{64})", formatted_url)
hashes=''.join(sha256_hash)
insert_time=time.strftime('%Y-%m-%d %H:%M:%S')
hash_list=re.findall(r"([a-fA-F\d]{64})", hashes)
for insert_hash in hash_list:
store_data(values, insert_time, insert_hash)
print(store_data)
print(hashes)
print(type(hashes))
except Exception as e:
print(e)
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
listener = StreamListener(api=tweepy.API(wait_on_rate_limit=True))
streamer = tweepy.Stream(auth=auth, listener=listener)
print("Tracking: " + str(WORDS))
streamer.filter(track=WORDS)
You have a first loop :
for insert_hash in hash_list:
store_data(values, insert_time, insert_hash)
And then you loop again on the values to build the data list of tuples :
for value in values:
data.append((value, insert_time, insert_hash))
So the values are called twice.
Maybe you could use zip() or enumerate() to join the hash_list and the values before calling store_data ?
data = []
if len(values) == len(hash_list):
for val,hash in zip(values, hash_list):
data.append((val, insert_time, hash))
store_data(data)
And then, no need to loop again inside store_data(), just change the signature to pass directly the data list:
def store_data(data_list):
# connection to database
cursor.executemany("""INSERT INTO tweet_url VALUES (%s,%s,%s)""",data_list)

Get tweets with hashtag from specific time period

I'm new to python programming and Twitter API.
I tired to collect tweets with a hashtag from a specific time period(say 11/24/216-11/27/2017), my goal is to get coordinates from those extracted tweets and save the coordinates and the tweet text into a csv file.
But my problem is that i don't know how to set the time filter and save them into a file. What's more, only a few tweets contained the coordinates, was that common?
Here are the python scripts that i found online.
import json
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
#Enter Twitter API Key information
consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''
file = open("C:\\Output.csv", "w") #This script didn't work on my Mac#
strong text
file.write("X,Y\n")
data_list = []
count = 0
class listener(StreamListener):
def on_data(self, data):
global count
#How many tweets you want to find, could change to time based
if count <= 2000:
json_data = json.loads(data)
coords = json_data["coordinates"]
if coords is not None:
print coords["coordinates"]
lon = coords["coordinates"][0]
lat = coords["coordinates"][1]
data_list.append(json_data)
file.write(str(lon) + ",")
file.write(str(lat) + "\n")
count += 1
return True
else:
file.close()
return False
def on_error(self, status):
print status
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
twitterStream = Stream(auth, listener())
#What you want to search for here
twitterStream.filter(track=[""])

Save specific tweet fields in csv file

With the following part of code i open a csv file and write the text of a tweet in the first column
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import time
ckey = ''
csecret = ''
atoken = ''
asecret = ''
class listener(StreamListener):
def on_data(self,data):
try:
#print data
text = data.split(',"text":"')[1].split('","source')[0]
print text
saveThis = str (time.time())+'::'+text
saveFile = open('tweets3.csv','a')
saveFile.write(saveThis)
saveFile.write('\n')
saveFile.close()
except BaseException, e:
print 'failed on data',str(e)
time.sleep(5)
return True
def on_error (self,status):
print status
auth = OAuthHandler (ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
result = twitterStream.filter(track=["zika"], languages=['en'])
my output looks like this
I want to store for each tweet the username of the one that makes it in column B and the number of his followers in column C. Can anybody help?

Categories

Resources