unable to write csv file, python - python

here is my code, i added exception to indexerror, but its not writing to csv file.
import urllib2
import csv
import time
import requests
import os
#a = open(r"C:\Drive F data\Client\Blake\III\2.txt")
a = ['http://www.houzz.com/trk/aHR0cDovL3d3dy5SSUtCLmNvbQ/b020157b98711b4a190eee3331eb0066/ue/MjA5ODg2MQ/1b9b00b9fdc9f270f14688046ef161e2',
'http://www.houzz.com/trk/aHR0cDovL3d3dy5nc2NhYmluZXRyeS5jb20/0323b7db059b9e0357d045685be21a6d/ue/NDY2MjE0/d8815293352eb2a6e40c95060c019697',
'http://www.houzz.com/trk/aHR0cDovL3NpY29yYS5jb20/dc807b3705b95b5da772a7aefe23a803/ue/Njc0NDA4/a73f8bdb38e10abd5899fb5c55ff3548',
'http://www.houzz.com/trk/aHR0cDovL3d3dy5DYXNlRGVzaWduLmNvbQ/d79c6af934e3c815d602c4d79b0d6617/ue/OTY3MDg/ce9a87e31e84871a96bca7538aae9856',
'http://www.houzz.com/trk/aHR0cDovL2phcnJldHRkZXNpZ25sbGMuY29t/9d0009d3544d9c22f6058b20097321b3/ue/MzExNTk1NA/310d49732d317725364368ea3fbfd7c1',
'http://www.houzz.com/trk/aHR0cDovL3d3dy5yb2JlcnRsZWdlcmVkZXNpZ24uY29t/8ac7311be2f794654cefba71474563f7/ue/MTExNTQ4/af201ffdc62de6aba9e2de90f69a770d']
with open("C:\Drive F data\Blake\III/2.csv", "ab")as export:
names = ['source','link']
writer = csv.DictWriter(export, fieldnames=names)
writer.writeheader()
for each in a:
try:
link = urllib2.urlopen(each).geturl()
except IndexError:
pass
print each, link
writer.writerow({'source':each,'link':link})
After removing try & exception , it works fine

I think you miss escape character in your file path. "C:\\Drive F data\\Blake\\III\\2.csv"

Related

Python Append file should refreshed with new data

I am trying to write my output into a file what my code is doing is that its looking for matched file names and storing it into a file similary for unmatched files but the problem is when i use write it overwrites the file and when i use append on every run it keeps on appending the file matched file names. What i need is that it refresh te file whenever the script is run and loads it with current data only.
import re
import sys
import os
import glob
import pandas as pd
import logging
try:
for file in glob.glob('*.csv'):
r = re.search(r'abc_sales_(20[0-9][0-9])-([1-9]|1[0-2]|0[0-9])-([1-9]|1[0-9]|2[0-9]|3[0-1]|0[0-9])-[0-9]{2}_[a-z0-9]{3,5}.csv', file)
if r:
#matched=file
print(f'File matched:{file}')
fp=open('bad_lines.txt', 'r+')
sys.stdout = fp
else:
path=f'File not matched:{file}'
f=open('filenotmatched.txt','a')
f.seek(0)
f.truncate()
f.write(path+'\n')
f.close()
except Exception as e:
pass
Suggested changes to your code.
import re
import sys
import os
import glob
import pandas as pd
import logging
# We create new 'bad_lines.txt' and
# 'filenotmatched.txt' for each run
with open('bad_lines.txt', 'w') as f_badlines, open('filenotmatched.txt','w') as f_notmatched:
try:
for file in glob.glob('*.csv'):
r = re.search(r'abc_sales_(20[0-9][0-9])-([1-9]|1[0-2]|0[0-9])-([1-9]|1[0-9]|2[0-9]|3[0-1]|0[0-9])-[0-9]{2}_[a-z0-9]{3,5}.csv', file)
if r:
#matched=file
#print(f'File matched:{file}')
#fp=open('bad_lines.txt', 'r+')
# ** Not clear why you redirected
# ** standard out to a file
# ** rather than writing to file directly
#sys.stdout = fp
f_badlines.write(f'File matched:{file}\n')
else:
path=f'File not matched:{file}'
#f=open('filenotmatched.txt','a')
#f.seek(0)
#f.truncate()
#f.write(path+'\n')
#f.close()
f_notmatched.write(path + '\n')
except Exception as e:
pass

python write and open temporary csv

Using Python 3 on a windows machine:
I have a function to take a list of lists and open it as a csv file using my default application (excel). Despite closing the file after writing, I get a 'locked for editing' message when excel starts.
def opencsv(data):
"""saves a list of lists as a csv and opens"""
import tempfile
import os
import csv
handle, fn = tempfile.mkstemp(suffix='.csv')
with open(fn,"w", encoding='utf8',errors='surrogateescape',\
newline='') as f:
writer=csv.writer(f)
for row in data:
try:
writer.writerow(row)
except Exception as e:
print ('Error in writing row:',e)
f.close()
url = 'file://' + fn.replace(os.path.sep, '/')
os.startfile(fn)
opencsv([['d1','d2'],['d3','d4','d5']])
How can I fix this?
Answer from swstephe's input:
The issue is that mkstemp opens the file and associates it with an os handle. In my original code I was not closing this file properly. See below for updated code.
def opencsv(data):
"""saves a list of lists as a csv and opens"""
import tempfile
import os
import csv
handle, fn = tempfile.mkstemp(suffix='.csv')
with os.fdopen(handle,"w", encoding='utf8',errors='surrogateescape',\
newline='') as f:
writer=csv.writer(f)
for row in data:
try:
writer.writerow(row)
except Exception as e:
print ('Error in writing row:',e)
print (fn)
os.startfile(fn)

Creating a python program that scrapes file from a website

This is what I have so far
import urllib
Champions=["Aatrox","Ahri","Akali","Alistar","Amumu","Anivia","Annie","Ashe","Azir","Blitzcrank","Brand","Braum","Caitlyn","Cassiopeia","ChoGath","Corki","Darius","Diana","DrMundo","Draven","Elise","Evelynn","Ezreal","Fiddlesticks","Fiora","Fizz","Galio","Gangplank","Garen","Gnar","Gragas","Graves","Hecarim","Heimerdinger","Irelia","Janna","JarvanIV","Jax","Jayce","Jinx","Kalista","Karma","Karthus","Kassadin","Katarina","Kayle","Kennen","KhaZix","KogMaw","LeBlanc","LeeSin","Leona","Lissandra","Lucian","Lulu","Lux","Malphite","Malzahar","Maokai","MasterYi","MissFortune","Mordekaiser","Morgana","Nami","Nasus","Nautilus","Nidalee","Nocturne","Nunu","Olaf","Orianna","Pantheon","Poppy","Quinn","Rammus","RekSai","Renekton","Rengar","Riven","Rumble","Ryze","Sejuani","Shaco","Shen","Shyvana","Singed","Sion","Sivir","Skarner","Sona","Soraka","Swain","Syndra","Talon","Taric","Teemo","Thresh","Tristana","Trundle","Tryndamere","TwistedFate","Twitch","Udyr","Urgot","Varus","Vayne","Veigar","VelKoz","Vi","Viktor","Vladimir","Volibear","Warwick","Wukong","Xerath","XinZhao","Yasuo","Yorick","Zac","Zed","Ziggs","Zilean","Zyra"]
currentCount=0
while currentCount < len(Champions):
urllib.urlretrieve("http://www.lolflavor.com/champions/"+Champions[currentCount]+ "/Recommended/"+Champions[currentCount]+"_lane_scrape.json","C:\Users\Jay\Desktop\LolFlavor\ " +Champions[currentCount]+ "\ "+Champions[currentCount]+ "_lane_scrape.json")
currentCount+=1
What the program is meant to do is to use the list and the currentCount to get the champion, then it goes to the website e.g for "Aatrox" http://www.lolflavor.com/champions/Aatrox/Recommended/Aatrox_lane_scrape.json, then it downloads and stores the file in the folder LolFlavor/Aatrox/Aatrox_lane_scrape.json in this case.
The bit which is Aatrox changes depending on the champion.
Can anyone help me to get it to work?
EDIT: CURRENT CODE WITH VALUE ERROR:
import json
import os
import requests
Champions=["Aatrox","Ahri","Akali","Alistar","Amumu","Anivia","Annie","Ashe","Azir","Blitzcrank","Brand","Braum","Caitlyn","Cassiopeia","ChoGath","Corki","Darius","Diana","DrMundo","Draven","Elise","Evelynn","Ezreal","Fiddlesticks","Fiora","Fizz","Galio","Gangplank","Garen","Gnar","Gragas","Graves","Hecarim","Heimerdinger","Irelia","Janna","JarvanIV","Jax","Jayce","Jinx","Kalista","Karma","Karthus","Kassadin","Katarina","Kayle","Kennen","KhaZix","KogMaw","LeBlanc","LeeSin","Leona","Lissandra","Lucian","Lulu","Lux","Malphite","Malzahar","Maokai","MasterYi","MissFortune","Mordekaiser","Morgana","Nami","Nasus","Nautilus","Nidalee","Nocturne","Nunu","Olaf","Orianna","Pantheon","Poppy","Quinn","Rammus","RekSai","Renekton","Rengar","Riven","Rumble","Ryze","Sejuani","Shaco","Shen","Shyvana","Singed","Sion","Sivir","Skarner","Sona","Soraka","Swain","Syndra","Talon","Taric","Teemo","Thresh","Tristana","Trundle","Tryndamere","TwistedFate","Twitch","Udyr","Urgot","Varus","Vayne","Veigar","VelKoz","Vi","Viktor","Vladimir","Volibear","Warwick","Wukong","Xerath","XinZhao","Yasuo","Yorick","Zac","Zed","Ziggs","Zilean","Zyra"]
for champ in Champions:
os.makedirs("C:\\Users\\Jay\\Desktop\\LolFlavor\\{}\\Recommended".format(champ), exist_ok=True)
with open(r"C:\Users\Jay\Desktop\LolFlavor\{}\Recommended\{}_lane_scrape.json".format(champ,champ),"w") as f:
r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_lane_scrape.json".format(champ,champ))
json.dump(r.json(),f)
with open(r"C:\Users\Jay\Desktop\LolFlavor\{}\Recommended\{}_jungle_scrape.json".format(champ,champ),"w") as f:
r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_jungle_scrape.json".format(champ,champ))
json.dump(r.json(),f)
with open(r"C:\Users\Jay\Desktop\LolFlavor\{}\Recommended\{}_support_scrape.json".format(champ,champ),"w") as f:
r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_support_scrape.json".format(champ,champ))
json.dump(r.json(),f)
with open(r"C:\Users\Jay\Desktop\LolFlavor\{}\Recommended\{}_aram_scrape.json".format(champ,champ),"w") as f:
r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_aram_scrape.json".format(champ,champ))
json.dump(r.json(),f)
import requests
Champions=["Aatrox","Ahri","Akali","Alistar","Amumu","Anivia","Annie","Ashe","Azir","Blitzcrank","Brand","Braum","Caitlyn","Cassiopeia","ChoGath","Corki","Darius","Diana","DrMundo","Draven","Elise","Evelynn","Ezreal","Fiddlesticks","Fiora","Fizz","Galio","Gangplank","Garen","Gnar","Gragas","Graves","Hecarim","Heimerdinger","Irelia","Janna","JarvanIV","Jax","Jayce","Jinx","Kalista","Karma","Karthus","Kassadin","Katarina","Kayle","Kennen","KhaZix","KogMaw","LeBlanc","LeeSin","Leona","Lissandra","Lucian","Lulu","Lux","Malphite","Malzahar","Maokai","MasterYi","MissFortune","Mordekaiser","Morgana","Nami","Nasus","Nautilus","Nidalee","Nocturne","Nunu","Olaf","Orianna","Pantheon","Poppy","Quinn","Rammus","RekSai","Renekton","Rengar","Riven","Rumble","Ryze","Sejuani","Shaco","Shen","Shyvana","Singed","Sion","Sivir","Skarner","Sona","Soraka","Swain","Syndra","Talon","Taric","Teemo","Thresh","Tristana","Trundle","Tryndamere","TwistedFate","Twitch","Udyr","Urgot","Varus","Vayne","Veigar","VelKoz","Vi","Viktor","Vladimir","Volibear","Warwick","Wukong","Xerath","XinZhao","Yasuo","Yorick","Zac","Zed","Ziggs","Zilean","Zyra"]
for champ in Champions:
r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_lane_scrape.json".format(champ,champ))
print(r.json())
If you want to save each to a file. dump the json.
import json
import simplejson
for champ in Champions:
with open(r"C:\Users\Jay\Desktop\LolFlavor\{}_lane_scrape.json".format(champ),"w") as f:
try:
r = requests.get("http://www.lolflavor.com/champions/{}/Recommended/{}_lane_scrape.json".format(champ, champ))
json.dump(r.json(),f)
except simplejson.scanner.JSONDecodeError as e:
print(e.r.url)
The error is from 404 - File or directory not found as one of you calls fails so there is no valid json to decode.
The offending url is:
u'http://www.lolflavor.com/champions/Wukong/Recommended/Wukong_lane_scrape.json'
which if you try in your browser will also give you a 404 error. That is caused by the fact there is no user Wukong which can be confirmed by opening http://www.lolflavor.com/champions/Wukong/ in your browser
There is no need to index the list using a while loop. simply iterate over the list items directly and use str.format to pass the variables into the url. Also make sure you use raw string r for the file path when using \'s as they have a special meaning in python they are using to escape characters so \n or \r etc.. in your paths would cause problems. You can also use / or escape using \\.

Python not saving files to a different folder than where the python file is on Ubuntu 11.10

So I am pulling jpg's from a url. I am able to save the image files as long as they are being saved to the same folder the python file is in. As soon as I attempt to change the folder(seen here as the outpath) the image files do not get created. I imagine it has something to do with my outpath, but it seems to be fine when I am printing and watching it in the console.
Ubuntu 11.10 OS by the way. I'm a newbie with both linux and python, so it could easily be either. :)
If I were to print the sequence taken from the CSV file it would look like: [['Champ1', 'Subname1', 'imgurl1'],['Champ2', 'subname2', 'imgurl2'],['Champ3','subname3','imgurl3']...]
(It was scraped from a website)
import csv
from urlparse import urlsplit
from urllib2 import urlopen, build_opener
from urllib import urlretrieve
import webbrowser
import os
import sys
reader = csv.reader(open('champdata.csv', "rb"), delimiter = ",", skipinitialspace=True)
champInfo = []
for champs in reader:
champInfo.append(champs)
size = len(champInfo)
def GetImages(x, out_folder="/home/sean/Home/workspace/CP/images"):
b=1
size = len(champInfo)
print size
while b < size:
temp_imgurls = x.pop(b)
filename = os.path.basename(temp_imgurls[2])
print filename
outpath = os.path.join(out_folder, filename)
print outpath
u = urlopen(temp_imgurls[2])
localFile = open(outpath, 'wb')
localFile.write(u.read())
localFile.close()
b+=1
GetImages(champInfo)
I understand it's quite crude, but it does work, only if I'm not attempting to change the save path.
Try providing the complete image path everywhere
E:/../home/sean/Home/workspace/CD/images
def GetImages(x):
b=1
size = len(champInfo)
print size
while b < size:
temp_imgurls = x.pop(b)
filename = temp_imgurls[2]
u = urlopen(temp_imgurls[2])
localFile = open(filename, 'wb')
localFile.write(u.read())
localFile.close()
And this code will be save files in the same directory where script is.
Updated Answer:
I think the answer to your problem is just to add a check for the output directory existence, and create it if needed. ie, add:
if not os.path.exists(out_folder):
os.makedirs(out_folder)
to your existing code.
More generally , you could try something more like this:
import csv
from urllib2 import urlopen
import os
import sys
default_outfolder = "/home/sean/Home/workspace/CD/images"
# simple arg passing wihtout error checking
out_folder = sys.argv[1] if len(sys.argv) == 2 else default_outfolder
if not os.path.exists(out_folder):
os.makedirs(out_folder) # creates out_folder, including any required parent ones
else:
if not os.path.isdir(out_folder):
raise RuntimeError('output path must be a directory')
reader = csv.reader(open('champdata.csv', "rb"), delimiter = ",", skipinitialspace=True)
for champs in reader:
img_url = champs[2]
filename = os.path.basename(img_url)
outpath = os.path.join(out_folder, filename)
print 'Downloading %s to %s' % (img_url, outpath)
with open(outpath, 'wb') as f:
u = urlopen(img_url)
f.write(u.read())
The above code works for champdata.csv of the form stuff,more_stuff,http://www.somesite.com.au/path/to/image.png
but will need to be adapted if I have not understood the actual format of your incoming data.

With regards to urllib AttributeError: 'module' object has no attribute 'urlopen'

import re
import string
import shutil
import os
import os.path
import time
import datetime
import math
import urllib
from array import array
import random
filehandle = urllib.urlopen('http://www.google.com/') #open webpage
s = filehandle.read() #read
print s #display
#what i plan to do with it once i get the first part working
#results = re.findall('[<td style="font-weight:bold;" nowrap>$][0-9][0-9][0-9][.][0-9][0-9][</td></tr></tfoot></table>]',s)
#earnings = '$ '
#for money in results:
#earnings = earnings + money[1]+money[2]+money[3]+'.'+money[5]+money[6]
#print earnings
#raw_input()
this is the code that i have so far. now i have looked at all the other forums that give solutions such as the name of the script, which is parse_Money.py, and i have tried doing it with urllib.request.urlopen AND i have tried running it on python 2.5, 2.6, and 2.7. If anybody has any suggestions it would be really welcome, thanks everyone!!
--Matt
---EDIT---
I also tried this code and it worked, so im thinking its some kind of syntax error, so if anybody with a sharp eye can point it out, i would be very appreciative.
import shutil
import os
import os.path
import time
import datetime
import math
import urllib
from array import array
import random
b = 3
#find URL
URL = raw_input('Type the URL you would like to read from[Example: http://www.google.com/] :')
while b == 3:
#get file name
file1 = raw_input('Enter a file name for the downloaded code:')
filepath = file1 + '.txt'
if os.path.isfile(filepath):
print 'File already exists'
b = 3
else:
print 'Filename accepted'
b = 4
file_path = filepath
#open file
FileWrite = open(file_path, 'a')
#acces URL
filehandle = urllib.urlopen(URL)
#display souce code
for lines in filehandle.readlines():
FileWrite.write(lines)
print lines
print 'The above has been saved in both a text and html file'
#close files
filehandle.close()
FileWrite.close()
it appears that the urlopen method is available in the urllib.request module and not in the urllib module as you're expecting.
rule of thumb - if you're getting an AttributeError, that field/operation is not present in the particular module.
EDIT - Thanks to AndiDog for pointing out - this is a solution valid for Py 3.x, and not applicable to Py2.x!
The urlopen function is actually in the urllib2 module. Try import urllib2 and use urllib2.urlopen
I see that you are using Python2 or at least intend to use Python2.
urlopen helper function is available in both urllib and urllib2 in Python2.
What you need to do this, execute this script against the correct version of your python
C:\Python26\python.exe yourscript.py

Categories

Resources