I'm beginner to using Python. It's hard to study alone.
I collected data from twitter. And I can see data through IPython Console(Spyder). I want to print data to text file, but it doesn't work. My code is follow that. What should I do for printing data to text file?
import tweepy
import pandas as pd
consumer_key = ''
consumer_skey = ''
access_token = ''
access_stoken = ''
class listener(tweepy.StreamListener):
def on_data(self, data):
print (data)
return data
def on_err(self, status):
print (status)
auth = tweepy.OAuthHandler(consumer_key, consumer_skey)
auth.set_access_token(access_token, access_stoken)
twitterStreaming = tweepy.Stream(auth, listener())
twitterStreaming.filter(track=(["siri"]))
df = pd.DataFrame()
df.to_csv(r'C:/Users/ID500/Desktop/Sentiment analysis/hi.txt', header=None, index=None, sep=' ', mode='a')
You can open the text file and write the data to it and then close it, you don't need to use Pandas for that. Also, your class listner is not formatted properly.
Here's the code that you need to modify.
class listener(tweepy.StreamListener):
def on_data(self, data):
print (data)
# write to file here.
out_file = open("FILE_PATH_HERE", 'a')
out_file.write(data)
out_file.close()
return data
def on_err(self, status):
print (status)
Hope this helps.
Related
so as the title says I hardly tried to figure out how to save tweets using tweepy in python 3.6. I found a solution that I can save it in English but I can't in Arabic. anyone have any ideas how?
the output I get in the CSV file for Arabic tweets is like this
1510123361.875904::\u0623\u0639\u0648\u0630 \u0628\u0643\u0644\u0645\u0627\u062a \u0627\u0644\u0644\u0647 \u0627\u0644/FMsjMi2nvF
Thank you in advance.
This is my code
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
save = open('ExampleNumber4.csv', mode='w', encoding="utf8", newline=None)
class listener(StreamListener) :
def on_data (self , data):
try:
tweet = json.loads(data)['text']
print(tweet.translate(non_bmp_map))
tweet = data.split(',"text":"')[1].split('","source')[0]
savefile = str(time.time()) + "::" + tweet
save.write(savefile)
save.write("\n\n")
return (True)
except KeyError:
pass
def on_error(self , status):
print(status)
auth = OAuthHandler (ConsumerKey , ConsumerSecret)
auth.set_access_token(AccessToken , AccessTokenSecret)
twitterStream = Stream(auth , listener())
twitterStream.filter(track=[u'سيارة'])
save.close()
Here's a working solution. Please try to make working examples that produce the error of your question next time by including some sample JSON data and skipping the twitter code that we can't run as is.
#coding:utf8
import sys
import json
import time
import csv
data = r'{"text": "\u0633\u064a\u0627\u0631\u0629\ud83d\ude00"}' # ASCII JSON
# data = '{"text": "سيارة😀"}' # non-ASCII JSON
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
with open('ExampleNumber4.csv', mode='w', encoding="utf-8-sig", newline='') as save:
writer = csv.writer(save)
tweet = json.loads(data)['text']
print(tweet.translate(non_bmp_map))
savefile = [time.time(),tweet]
writer.writerow(savefile)
Output:
1510208283.7488384,سيارة�
i'm trying to write my tweepy results to a CSV file in Arabic, But the output file is always in unicode example :
u0646 \u060c \u0627\u0644\u0645\u0646\u062a\u062e\u0628 \u0627\u0644\u0633\u0648\u0631\u064a \u0628\u064a\u0631\u062c\u0639 \u0639\u0628\u064a\u062a\u0648 \u0648\u0645\u0627 \u0628\u0634\u0645 \u0627\u0644\u0645\u0644\u062d\u0642 \u062d\u062a\u0649
my code:
def on_data (self , data):
try:
tweet = json.loads(data)['text']
tweet = data.split(',"text":"')[1].split('","source')[0]
savefile = str(time.time()) + "::" + tweet
save = open('twitterDB4.csv', 'a')
save.write(savefile)
save.write("\n\n")
save.close()
return (True)
except KeyError:
pass
i am collecting twitter data with python tweepy here is code
class listener (StreamListener):
def on_data(self, raw_data):
data = json.loads(raw_data)
print data.keys()
tweet = data['text'].encode("utf-8")
tweet_id = data['id']
time_tweet = data['timestamp_ms']
date = datetime.datetime.fromtimestamp(int(time_tweet) / 1000)
new_date = str(date).split(" ") [0]
print new_date
user_id = data['user']['id']
with open('twitDB.csv','ab') as csvfile:
myfile = csv.writer(csvfile)
myfile.writerow([tweet_id,new_date,tweet,user_id])
return True
def on_error(self, status_code):
print status_code
auth = OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_token,access_token_secret)
twitterStream = Stream(auth,listener())
twitterStream.filter(track=["car"])
But in Csv File for tweet_id and user_id are in 8.85132E+17 format how i resolve this ?
Place a tab character in front of tweet_id
tweet_id = '\t' + data['id']
When I run the code,
I want to save it as output.
But my code is stored differently.
How do I fix it? Thank you for your advice.
"run result" =
1 : {'text': 'Today is sunday! https//abcd'}
2 : {'text': 'hi!!!\nhi!!!\nhi!!! https//abcd}
"Text file saving result"=
Today is sunday! https//abcd
hi!!!
hi!!!
hi!!! https//abcd
import tweepy
import time
import os
import json
import simplejson
search_term = ''
lat = ""
lon = ""
radius = ""
API_key = ""
API_secret = ""
Access_token = ""
Access_token_secret = ""
location = "%s,%s,%s" % (lat, lon, radius)
auth = tweepy.OAuthHandler(API_key, API_secret)
auth.set_access_token(Access_token, Access_token_secret)
api = tweepy.API(auth)
c=tweepy.Cursor(api.search,
q="{}".format(search_term),
rpp=100,
geocode=location,
include_entities=False)
wfile = open("test1.txt", mode='w', encoding='utf8')
data = {}
i = 1
for tweet in c.items():
data['text'] = tweet.text
print(i, ":", data)
wfile.write(data['text']+'\n')
time.sleep(0.35)
i += 1
wfile.close()
you need to clarify your question, i guess nobody really knows what you want to achieve exactly. Do you want a "json like" output of a dictionary in your text file? Give us an example, how your file's content should look like.
I give a guess, maybe you just want the string represenation of your dict, you get the string representation by calling str(data) or data.__str__().
wfile = open("test1.txt", mode='w', encoding='utf8')
data = {}
i = 1
for tweet in c.items():
data['text'] = tweet.text
print(i, ":", data)
wfile.write(str(data) +'\n')
time.sleep(0.35)
i += 1
wfile.close()
I have this function for streaming text files:
def txt_response(filename, iterator):
if not filename.endswith('.txt'):
filename += '.txt'
filename = filename.format(date=str(datetime.date.today()).replace(' ', '_'))
response = Response((_.encode('utf-8')+'\r\n' for _ in iterator), mimetype='text/txt')
response.headers['Content-Disposition'] = 'attachment; filename={filename}'.format(filename=filename)
return response
I am working out how to stream a CSV in a similar manner. This page gives an example, but I wish to use the CSV module.
I can use StringIO and create a fresh "file" and CSV writer for each line, but it seems very inefficient. Is there a better way?
According to this answer how do I clear a stringio object? it is quicker to just create a new StringIO object for each line in the file than the method I use below. However if you still don't want to create new StringIO instances you can achieve what you want like this:
import csv
import StringIO
from flask import Response
def iter_csv(data):
line = StringIO.StringIO()
writer = csv.writer(line)
for csv_line in data:
writer.writerow(csv_line)
line.seek(0)
yield line.read()
line.truncate(0)
line.seek(0) # required for Python 3
def csv_response(data):
response = Response(iter_csv(data), mimetype='text/csv')
response.headers['Content-Disposition'] = 'attachment; filename=data.csv'
return response
If you just want to stream back the results as they are created by csv.writer you can create a custom object implementing an interface the writer expects.
import csv
from flask import Response
class Line(object):
def __init__(self):
self._line = None
def write(self, line):
self._line = line
def read(self):
return self._line
def iter_csv(data):
line = Line()
writer = csv.writer(line)
for csv_line in data:
writer.writerow(csv_line)
yield line.read()
def csv_response(data):
response = Response(iter_csv(data), mimetype='text/csv')
response.headers['Content-Disposition'] = 'attachment; filename=data.csv'
return response
A slight improvement to Justin's existing great answer. You can take advantage of the fact that csv.writerow() returns the value returned by the underlying file's write call.
import csv
from flask import Response
class DummyWriter:
def write(self, line):
return line
def iter_csv(data):
writer = csv.writer(DummyWriter())
for row in data:
yield writer.writerow(row)
def csv_response(data):
response = Response(iter_csv(data), mimetype='text/csv')
response.headers['Content-Disposition'] = 'attachment; filename=data.csv'
return response
If you are dealing with large amounts of data that you don't want to store in memory then you could use SpooledTemporaryFile. This would use StringIO until it reaches a max_size after that it will roll over to disk.
However, I would stick with the recommended answer if you just want to stream back the results as they are created.