How to Automate a Report from Namely using API - python

I want to automate a report I created in Namely using python, how can I do this with the Namely API?

Here's a python script I made that should cover it:
#Imports
import http.client
import json
import os
import time
import smtplib
#Constants
namelyDomain = "company.namely.com" #change this to your company's namely
csvName = "C:\\Path\\To_Write\\Your_CSV\\Report.csv" #absolute path to write csv
reportID = "0a12bac7-eac4-4bae-b18f-63ea3173gbb4" #report ID (find in URL)
APIkey = "yuIo4fH7f4z4dgabsSqXzxm9IMbW1ixLhjP0eh8jPuIo9vUI1nij9qZmG822al54" #get this from Namely>API>Personal Access Tokens
server = smtplib.SMTP()
#Variables
line = ""
columnCount = 0
#run report with get request
conn = http.client.HTTPSConnection(namelyDomain)
payload = "{}"
headers = { 'authorization': "Bearer " + APIkey }
conn.request("GET", "/api/v1/reports/" + reportID + ".json", payload, headers)
res = conn.getresponse()
if(res.status != 200):
print("failed to connect")
exit()
data = res.read() #returns json object
#Delete if it exists (overwrite)
if os.path.exists(csvName):
os.remove(csvName)
#make the csv
f = open(csvName,"w")
#get objects to loop from
dataHeader = dataRow = json.loads(data)
#Print headers to CSV
for data in dataHeader['reports'][0]['columns']:
columnCount = columnCount + 1
line = line + str(data['label']) + ","
line = line.rstrip(",")
f.write(line + chr(10))
#Print rows to CSV
for data in dataRow['reports'][0]['content']:
line = '"'
for ndx in range(0,columnCount):
line = line + str(data[ndx]) + '","'
line = line.replace("None","").replace('\u202d','').replace('\u202c','').rstrip('"').rstrip(",")
f.write(line + chr(10))
Just replace:
namelyDomain with your company's namely domain
csvName with the absolute path of where you want to write the csv report
reportID with the id of the report you want to generate
APIkey with the personal access token from namely
Useful Link: https://developers.namely.com/1.0/reports/show-report

Related

Create new file with other name if file name already exist in python

For the moment my script can create a file and the content. However, I would like to have a way in my code to directly noticed if the file already exists, and then if it exists, it creates a new file which will not be Y but Y_2 or Y with the current date. So I can have a history of my file created.
customername = "X"
workspacename = "Y"
structureDict = {
"attribute": ["attributes/screens"],
"content": ["containers",
"dataProcessing",
"fields",
"properties",
"sources",
"structures",
"usages"],
"types": ["containers/types",
"dataProcessing/types",
"fields/types",
"properties/types",
"sources/types",
"structures/types",
"usages/types"]
}
for category in structureDict:
for endpoint in structureDict[category]:
print (category, endpoint)
def writeContentToFile(mode, customername, workspacename, category, endpoint, jsonContent):
path = os.path.join(os.getcwd(), customername, workspacename, category)
Path(path).mkdir(parents=True, exist_ok=True)
with open(path + "/" + endpoint + '.json', mode, encoding='utf-8') as f:
json.dump(jsonContent, f, ensure_ascii=False, indent=4)
f.close()
for category in structureDict:
for endpoint in structureDict[category]:
endpointFilename = endpoint
if category in ["attribute", "types"]:
endpointFilename = endpoint.replace("/", "_")
url = url_X + endpoint
params = {"versionId":Workspace_id,
"includeAccessData":"true",
"includeAttributes":"true",
"includeLinks":"true"
}
jsonResponse = requests.get(url, params=params, headers={"Authorization":accessToken}).json()
writeContentToFile('a', customername, workspacename, category, endpointFilename, jsonResponse)
try:
jsonResponsePages = jsonResponse['pages']
if int(jsonResponsePages) != 1:
for i in range(2, jsonResponsePages+1, 1):
params["page"] = str(i)
jsonResponse = requests.get(url=url, params = params, headers={"Authorization":accessToken}).json()['results']
writeContentToFile('a', customername, workspacename, category, endpointFilename, jsonResponse)
except:
print(endpoint)
next
I don't think I understood your question well. but from what I got I can help you this much:
for checking if exists you can use os.path.exists(path_to_file) and putting it in a while loop would give you ability to check for existence of your file name and assign a new one (Y_2) if needed
def writeContentToFile(mode, customername, workspacename, category, endpoint, jsonContent):
path = os.path.join(os.getcwd(), customername, workspacename, category)
Path(path).mkdir(parents=True, exist_ok=True)
c = 1
while os.path.exists(path + "/" + (endpoint if c == 1 else endpoint + f'_{c}') + '.json'): c += 1
with open(path + "/" + (endpoint if c == 1 else endpoint + f'_{c}') + '.json', mode, encoding='utf-8') as f:
json.dump(jsonContent, f, ensure_ascii=False, indent=4)
You just need to make a small change in your writeContentToFile function
import os
from datetime import datetime
def writeContentToFile(mode, customername, workspacename, category, endpoint, jsonContent):
path = os.path.join(os.getcwd(), customername, workspacename, category)
date = datetime. now(). strftime("%Y_%m_%d") #getting the current date
new_endpoint=endpoint[:] #Creating a new endpoint value
index=2 #setting the counter to 2
while os.path.exists(os.path.join(path, new_endpoint)): #keep on checking Y_DATE_2 , Y_DATE_3 until filename is unique
new_endpoint=endpoint+'_'+date+'_'+index
index+=1
Path(path).mkdir(parents=True, exist_ok=True)
with open(path + "/" + new_endpoint + '.json', mode, encoding='utf-8') as f:
json.dump(jsonContent, f, ensure_ascii=False, indent=4)
f.close()
Explanation:
What we are doing is that first we are getting the current date as you need, then we are creating a new variable new_endpoint and then we are checking if the current filename exists and until it exists we will keep on adding _1, _2 ,_3... to the filename until we have a a filename which is unique.

Using Textract, how do you extract tables from a pdf file and output it into a csv file via .py script?

I want to use textract (via aws cli) to extract tables from a pdf file (located in an s3 location) and export it into a csv file. I have tried writing a .py script but am struggling to read from the file.
Any suggestions for writing the .py script is welcome.
This is my current script. I run into the error:
File "extract-table.py", line 63, in get_table_csv_results
bash: File: command not found
blocks=response['Blocks']
KeyError: 'Blocks'
import webbrowser, os
import json
import boto3
import io
from io import BytesIO
import sys
from pprint import pprint
def get_rows_columns_map(table_result, blocks_map):
rows = {}
for relationship in table_result['Relationships']:
if relationship['Type'] == 'CHILD':
for child_id in relationship['Ids']:
cell = blocks_map[child_id]
if cell['BlockType'] == 'CELL':
row_index = cell['RowIndex']
col_index = cell['ColumnIndex']
if row_index not in rows:
# create new row
rows[row_index] = {}
# get the text value
rows[row_index][col_index] = get_text(cell, blocks_map)
return rows
def get_text(result, blocks_map):
text = ''
if 'Relationships' in result:
for relationship in result['Relationships']:
if relationship['Type'] == 'CHILD':
for child_id in relationship['Ids']:
word = blocks_map[child_id]
if word['BlockType'] == 'WORD':
text += word['Text'] + ' '
if word['BlockType'] == 'SELECTION_ELEMENT':
if word['SelectionStatus'] =='SELECTED':
text += 'X '
def get_table_csv_results(file_name):
with open(file_name, 'rb') as file:
img_test = file.read()
bytes_test = bytearray(img_test)
print('Image loaded', file_name)
# process using image bytes
# get the results
client = boto3.client('textract')
#Response
response = client.start_document_text_detection(
DocumentLocation={
'S3Object': {
'Bucket': s3BucketName,
'Name': documentName
}
})
# Get the text blocks
blocks=response['Blocks']
pprint(blocks)
blocks_map = {}
table_blocks = []
for block in blocks:
blocks_map[block['Id']] = block
if block['BlockType'] == "TABLE":
table_blocks.append(block)
if len(table_blocks) <= 0:
return "<b> NO Table FOUND </b>"
csv = ''
for index, table in enumerate(table_blocks):
csv += generate_table_csv(table, blocks_map, index +1)
csv += '\n\n'
return csv
def generate_table_csv(table_result, blocks_map, table_index):
rows = get_rows_columns_map(table_result, blocks_map)
table_id = 'Table_' + str(table_index)
# get cells.
csv = 'Table: {0}\n\n'.format(table_id)
for row_index, cols in rows.items():
for col_index, text in cols.items():
csv += '{}'.format(text) + ","
csv += '\n'
csv += '\n\n\n'
return csv
def main(file_name):
table_csv = get_table_csv_results(file_name)
output_file = 'output.csv'
# replace content
with open(output_file, "wt") as fout:
fout.write(table_csv)
# show the results
print('CSV OUTPUT FILE: ', output_file)
# Document
s3BucketName = "chrisyou.sagemi.com"
documentName = "DETAIL.pdf"
if __name__ == "__main__":
file_name = sys.argv[1]
main(file_name)
There is a much simpler way using the Amazon Textractor Textractor library. pip install amazon-textract-textractor
This will create a csv per table in your pdf document. e.g output_p0_t0.csv
from textractor import Textractor
def extract_tables(s3_file_path, output_directory, s3_output_path):
extractor = Textractor(profile_name="default")
document = extractor.start_document_analysis(s3_file_path, textractor.data.constants.TextractFeatures.TABLES, s3_output_path)
for j, page in enumerate(document.pages):
for i, table in enumerate(document.tables):
with open(output_directory+f'/output_p{j}_t{i}.csv', 'w') as csv_file:
csv_file.write(table.to_csv())
return document
document = extract_tables('s3://<INPUT_FILE.PDF>', './<LOCAL_DIRECTORY_FOR_CSV>', 's3://<TEXTRACT_OUTPUT_DIRECTORY>')
I had to make slight changes to #Thomas answer by importing profile `
extractor = Textractor(profile_name="default") right after importing Textractor as shown below to avoid getting this error -> NameError: name 'textractor' is not defined.
from textractor import Textractor
extractor = Textractor(profile_name="default")
def extract_tables(s3_file_path, output_directory, s3_output_path):
document = extractor.start_document_analysis(s3_file_path, textractor.data.constants.TextractFeatures.TABLES, s3_output_path)
for j, page in enumerate(document.pages):
for i, table in enumerate(document.tables):
with open(output_directory+f'/output_p{j}_t{i}.csv', 'w') as csv_file:
csv_file.write(table.to_csv())
return document
document = extract_tables('s3://<INPUT_FILE.PDF>', './<LOCAL_DIRECTORY_FOR_CSV>', 's3://<TEXTRACT_OUTPUT_DIRECTORY>')
Hope it helps someone out there.

trying to download an image in python but it doesnt work

I am trying to make a script in python that downloads an image from a discord server but it doesnt work
My code
import http.client, json, calendar, os, time, base64
from datetime import datetime
app_token = "token"
channel_id = "id"
web_hk = "api/webhooks/my webhook"
latest_timestamp = ""
def query_server():
global app_token, channel_id, latest_timestamp
response = 'Processing...'
conn = http.client.HTTPSConnection("discordapp.com")
headers = {"authorization": "Bot " + app_token }
conn.request("GET", "/api/channels/" + channel_id +"/messages", "", headers)
r1 = conn.getresponse()
status = r1.reason
print(status)
r = r1.read()
print(r)
conversation = json.loads(r.decode('utf-8'))
# print(json.dumps(conversation, indent=4, sort_keys=True))
i = 0
while i < len(conversation) :
comment = conversation[i]
i += 1
timestamp = comment["timestamp"]
if(timestamp <= latest_timestamp) :
break
print(comment)
if(comment['content'] == 'Go!') :
print('parsing command')
style_comment = conversation[i]
if style_comment['attachments'] == [] :
response = 'Missing style image'
break
content_comment = conversation[i + 1]
if content_comment['attachments'] == []:
response = 'Missing content image'
break
conn = http.client.HTTPSConnection("cdn.discordapp.com")
current_time_int = str(int(time.mktime(datetime.utcnow().timetuple())))
# download style image
url = style_comment['attachments'][0]['url']
img_path = url.split("https://cdn.discordapp.com")[1]
t = url.split("/")
style_img_filename = current_time_int + "-" + t[-1]
conn.request("GET", img_path, "", headers)
r1 = conn.getresponse().read()
style_file = open(style_img_filename, "wb")
style_file.write(base64.encodebytes(r1))
style_file.close()
os.chmod(style_img_filename, 0o777)
# download content image
url = content_comment['attachments'][0]['url']
img_path = url.split("https://cdn.discordapp.com")[1]
t = url.split("/")
content_img_filename = current_time_int + "-" + t[-1]
conn.request("GET", img_path, "", headers)
r1 = conn.getresponse().read()
content_file = open(content_img_filename, "wb")
content_file.write(base64.encodebytes(r1))
content_file.close()
os.chmod(content_img_filename, 0o777)
output_img_filename = current_time_int + "-output.jpg"
cmd = "python neural_style.py --content {} --styles {} --output {} --width 500".format(content_img_filename, style_img_filename, output_img_filename)
print(cmd)
os.system(cmd)
break
print(response)
query_server()
What I get
Traceback (most recent call last):
File "neural_style.py", line 216, in <module>
main()
File "neural_style.py", line 119, in main
content_image = imread(options.content)
File "neural_style.py", line 201, in imread
img = scipy.misc.imread(path).astype(np.float)
File "C:\Users\Baxter\AppData\Local\Programs\Python\Python35\lib\site-packages\numpy\lib\utils.py", line 101, in newfunc
return func(*args, **kwds)
File "C:\Users\Baxter\AppData\Local\Programs\Python\Python35\lib\site-packages\scipy\misc\pilutil.py", line 164, in imread
im = Image.open(name)
File "C:\Users\Baxter\AppData\Local\Programs\Python\Python35\lib\site-packages\PIL\Image.py", line 2585, in open
% (filename if filename else fp))
OSError: cannot identify image file '1527709726-madelbrot.jpg'
It technicaly downloads somthing, because I see the file name in the folder, but it says it cannot identify it. I cant even open it.

Copy cell images from Smartsheet using Python

I am trying to make a copy of smart sheet data on my local disk. I am able to copy all the smart sheet data except for the cell images. Below is the code am using. This code works perfectly fine to copy the data but not the cell images
NOTE: I am not trying to copy the attachments from smart sheets; only the cell the images and data.
Could someone help me to enhance this code to copy the cell images as well?
import json
import os
import requests
import time
token = "Bearer <TOken>"
backed_up_sheets = {"Attach": 86960044478894,"test2":6659760455684}
dir = r'C:\Users\\me\SmartSheetsBackup\WorkSheet' + time.strftime("-%m_%d_%Y_%H_%M")
API_URL = "https://api.smartsheet.com/2.0/sheets/"
payload = {"Authorization": token,
"Accept": "application/vnd.ms-excel,image/*"}
amount = len(backed_up_sheets)
i = 1
for el in backed_up_sheets:
r = requests.get(API_URL + str(backed_up_sheets[el]) , headers=payload)
if r.status_code != 200:
print ('Some problem with connections please retry later0')
pass
if not os.path.exists(dir):
os.makedirs(dir)
with open(dir + el + time.strftime("-%m_%d_%Y_%H_%M") + ".xlsx", 'wb') as output:
output.write(r.content)
print ('Progress in sheets: ' + str(i) + '/' + str(amount))
i += 1
Here's a complete code sample:
# Download an image in a cell
def download_cell_image(client, sheet_id, row_id, column_id, default_filename):
# Get desired row
row = client.Sheets.get_row(sheet_id, row_id)
cell = row.get_column(column_id)
image = cell.image
filename = getattr(image, 'alt_text', default_filename)
# Obtain a temporary image URL
imageUrl = ss_client.models.ImageUrl( { "imageId": image.id } )
response = ss_client.Images.get_image_urls([imageUrl])
url = response.image_urls[0].url
# Download the image
import requests
response = requests.get(url)
if response.status_code == 200:
with open(filename, 'wb') as f:
f.write(response.content)
Note that this requires SDK version 1.3.0 or later
The same steps illustrated in the cURL example should work in Python. (Apologies that we don't have an complete published sample)
Get the image id from the cell object, as returned from get_sheet
Convert the image id to a download url, using images.get_image_urls (docs)
Download the image from the url, probably using the Requests library.

Can’t download YouTube video

I’m having trouble retrieving the YouTube video automatically. Here’s the code. The problem is the last part. download = urllib.request.urlopen(download_url).read()
# YouTube video download script
# 10n1z3d[at]w[dot]cn
import urllib.request
import sys
print("\n--------------------------")
print (" YouTube Video Downloader")
print ("--------------------------\n")
try:
video_url = sys.argv[1]
except:
video_url = input('[+] Enter video URL: ')
print("[+] Connecting...")
try:
if(video_url.endswith('&feature=related')):
video_id = video_url.split('www.youtube.com/watch?v=')[1].split('&feature=related')[0]
elif(video_url.endswith('&feature=dir')):
video_id = video_url.split('www.youtube.com/watch?v=')[1].split('&feature=dir')[0]
elif(video_url.endswith('&feature=fvst')):
video_id = video_url.split('www.youtube.com/watch?v=')[1].split('&feature=fvst')[0]
elif(video_url.endswith('&feature=channel_page')):
video_id = video_url.split('www.youtube.com/watch?v=')[1].split('&feature=channel_page')[0]
else:
video_id = video_url.split('www.youtube.com/watch?v=')[1]
except:
print("[-] Invalid URL.")
exit(1)
print("[+] Parsing token...")
try:
url = str(urllib.request.urlopen('http://www.youtube.com/get_video_info?&video_id=' + video_id).read())
token_value = url.split('video_id=' + video_id + '&token=')[1].split('&thumbnail_url')[0]
download_url = "http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token_value + "&fmt=18"
except:
url = str(urllib.request.urlopen('www.youtube.com/watch?v=' + video_id))
exit(1)
v_url = str(urllib.request.urlopen('http://' + video_url).read())
video_title = v_url.split('"rv.2.title": "')[1].split('", "rv.4.rating"')[0]
if '"' in video_title:
video_title = video_title.replace('"', '"')
elif '&' in video_title:
video_title = video_title.replace('&', '&')
print("[+] Downloading " + '"' + video_title + '"...')
try:
print(download_url)
file = open(video_title + '.mp4', 'wb')
download = urllib.request.urlopen(download_url).read()
print(download)
for line in download:
file.write(line)
file.close()
except:
print("[-] Error downloading. Quitting.")
exit(1)
print("\n[+] Done. The video is saved to the current working directory(cwd).\n")
There’s an error message (thanks Wooble):
Traceback (most recent call last):
File "C:/Python31/MyLib/DrawingBoard/youtube_download-.py", line 52, in <module>
download = urllib.request.urlopen(download_url).read()
File "C:\Python31\lib\urllib\request.py", line 119, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python31\lib\urllib\request.py", line 353, in open
response = meth(req, response)
File "C:\Python31\lib\urllib\request.py", line 465, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python31\lib\urllib\request.py", line 385, in error
result = self._call_chain(*args)
File "C:\Python31\lib\urllib\request.py", line 325, in _call_chain
result = func(*args)
File "C:\Python31\lib\urllib\request.py", line 560, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "C:\Python31\lib\urllib\request.py", line 353, in open
response = meth(req, response)
File "C:\Python31\lib\urllib\request.py", line 465, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python31\lib\urllib\request.py", line 391, in error
return self._call_chain(*args)
File "C:\Python31\lib\urllib\request.py", line 325, in _call_chain
result = func(*args)
File "C:\Python31\lib\urllib\request.py", line 473, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
The code on the original question relies on several assumptions about the content of YouTube pages and URLs (expressed in constructs such as "url.split('something=')[1]") which may not always be true. I tested it and it might depend even on which related videos show on the page. You might have tripped on any of those specificities.
Here's a cleaner version, which uses urllib to parse URLs and query strings, and which successfully downloads a video. I've removed some of the try/except which didn't do much but exit, for clarity. Incidentally, it deals with Unicode video titles by removing non-ASCII characters from the filename to which the video is saved. It also takes any numbers of YouTube URLs and downloads them all. Finally, it masks its user-agent as Chrome for Mac (which is what I currently use).
#!/usr/bin/env python3
import sys
import urllib.request
from urllib.request import urlopen, FancyURLopener
from urllib.parse import urlparse, parse_qs, unquote
class UndercoverURLopener(FancyURLopener):
version = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.9 Safari/533.2"
urllib.request._urlopener = UndercoverURLopener()
def youtube_download(video_url):
video_id = parse_qs(urlparse(video_url).query)['v'][0]
url_data = urlopen('http://www.youtube.com/get_video_info?&video_id=' + video_id).read()
url_info = parse_qs(unquote(url_data.decode('utf-8')))
token_value = url_info['token'][0]
download_url = "http://www.youtube.com/get_video?video_id={0}&t={1}&fmt=18".format(
video_id, token_value)
video_title = url_info['title'][0] if 'title' in url_info else ''
# Unicode filenames are more trouble than they're worth
filename = video_title.encode('ascii', 'ignore').decode('ascii').replace("/", "-") + '.mp4'
print("\t Downloading '{}' to '{}'...".format(video_title, filename))
try:
download = urlopen(download_url).read()
f = open(filename, 'wb')
f.write(download)
f.close()
except Exception as e:
print("\t Download failed! {}".format(str(e)))
print("\t Skipping...")
else:
print("\t Done.")
def main():
print("\n--------------------------")
print (" YouTube Video Downloader")
print ("--------------------------\n")
try:
video_urls = sys.argv[1:]
except:
video_urls = input('Enter (space-separated) video URLs: ')
for u in video_urls:
youtube_download(u)
print("\n Done.")
if __name__ == '__main__':
main()
I'm going to shamelessly plug my script which automates checking for valid formats, automatically choosing the best quality format for a video, and works on both the Flash and HTML5 variants of YouTube pages (as well as Vimeo).
If you wrote that script then please look at my source code for inspiration and feel free to steal some code. I challenge you to please write something better. Open source thrives on competition!
However, if you copied that script and are just trying to get it working, may I suggest you give my script a try and see if it fares better for you. You can access it both from the command line as a script or even as a module in another Python file.
You may also check youtube-dl which is written in Python and check how it's written.
It looks like YouTube guys have changed algorithms for accessing video files. Instead of "token" they now use "signature" variable, and "signature" seems to be dependent on either cookie-stored data or IP address of the client (in case of cookies-disabled browser like urllib in Python 2). Here's a hack I've come up with (URLs are IP address-locked):
#!/usr/bin/python
import re
from urlparse import *
from urllib import *
def yt_url(video_url):
video_id = parse_qs(urlparse(video_url).query)['v'][0]
get_vars = parse_qs(unquote(urlopen("http://www.youtube.com/get_video_info?video_id="+video_id).read()))
url = get_vars["id"][0].split(",")[1].split("|")[1]
elements = dict()
elements["itag"] = get_vars["itag"][0]
elements["sver"] = get_vars["sver"][0]
elements["expire"] = get_vars["expire"][0]
elements["signature"] = get_vars["signature"][0]
elements["factor"] = get_vars["factor"][0]
elements["id"] = get_vars["id"][0].split(",")[0]
elements["key"] = get_vars["key"][0]
elements["burst"] = get_vars["burst"][0]
elements["sparams"] = get_vars["sparams"][0]
elements["algorithm"] = get_vars["algorithm"][0]
elements["ipbits"] = "8"
for get_var in elements:
url += "&" + get_var + "=" + elements[get_var]
return (get_vars["title"][0], url)
if __name__ == '__main__':
(title, url) = yt_url("http://www.youtube.com/watch?v=4tAr7tuakt0")
print "Title: %s" % (title,)
print "Video: %s" % (url,)
#!/usr/bin/env python
import urllib2, urllib
import re
import os
import sys
import time
linkurl =raw_input('Enter URL:')
linkurl1 = urllib.urlopen(linkurl).read()
file1 = open("index.html", "w")
file1.write(linkurl1)
file1.close()
fname = 'index.html'
## Giving new matrix value to find
find = ("yt.playerConfig =", '"title":')
## File reading programme
with open(fname) as infile:
for line_no, line in enumerate(infile, 1):
lline = line.lower()
if any(word.lower() in lline for word in find):
y = line.rstrip()
fileurl = y
y1 = y.replace("%3A%2F%2F", "://")
y2 = y1.replace("%2F", "/")
y3 = y2.replace("%3F", "?")
y4 = y3.replace("%3D", "=")
y5 = y4.replace("%26", "&")
y6 = y5.replace("%252", "%2")
y7 = y6.replace("sig", "&signature")
# Display video resolution information
print ""
print "Video resolution: "
print "[46=1080(.webm)]--[37=1080(.mp4)]--[35=480(.flv)]--[36=180(.3gpp)]"
print "[45=720(.webm) ]--[22=720(.mp4) ]--[34=360(.flv)]--[17=144(.3gpp)]"
print "[44=480(.webm) ]--[18=360(.mp4) ]--[5=240(.flv) ]"
print "[43=360(.webm) ]"
print ""
# Programme to get all itag list file
itag = re.findall('itag=(\d+)', y)
print `"itag list= "` + `itag`
resol = raw_input("Type itag number: ")
# Programme to get filename file
fname = 'index.html'
find = (' <title>', '</title>')
with open(fname) as infile:
for line_no, line in enumerate(infile, 1):
lline = line.lower()
if any(word.lower() in lline for word in find):
y = line.rstrip()
fileurl1 = y.split(">")[-2]
filename2 = fileurl1.split('"')[-2]
if resol == '46':
# Programme to get WebM file in 1080 HD
y1080_webm = re.findall(r'itag=46(.*?)\u0026quality=hd1080', y7)
url_1080_webm1 = re.findall(r'\\u0026url=(.*?)\\u0026type', `y1080_webm`)
signature = re.findall(r'signature=(.*?)\\', `y1080_webm`)
url_1080_webm2 = `url_1080_webm1`.split("\\")[0]
url_1080_webm = url_1080_webm2.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_1080_webm
#print url_1080_webm
ext = ".webm"
elif resol == '37':
# Programme to get MP4 file in 1080 HD
y1080_mp4 = re.findall(r'itag=37(.*?)\u0026quality=hd1080', y7)
url_1080_mp41 = re.findall(r'\\u0026url=(.*?)\\u0026type', `y1080_mp4`)
signature = re.findall(r'signature=(.*?)\\', `y1080_mp4`)
url_1080_mp42 = `url_1080_mp41`.split("\\")[0]
url_1080_mp4 = url_1080_mp42.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_1080_mp4
#print url_1080_mp4
ext = ".mp4"
elif resol == '45':
# Programme to get WebM file in 720 HD
y720_webm = re.findall(r'itag=45(.*?)\u0026quality=hd720', y7)
url_720_webm1 = re.findall(r'\\u0026url=(.*?)\\u0026type', `y720_webm`)
signature = re.findall(r'signature=(.*?)\\', `y720_webm`)
url_720_webm2 = `url_720_webm1`.split("\\")[0]
url_720_webm = url_720_webm2.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_720_webm
#print url_720_webm
ext = ".webm"
elif resol == '22':
# Programme to get MP4 file in 720 HD
y720_mp4 = re.findall(r'itag=22(.*?)\u0026quality=hd720', y7)
url_720_mp41 = re.findall(r'\\u0026url=(.*?)\\u0026type', `y720_mp4`)
signature = re.findall(r'signature=(.*?)\\', `y720_mp4`)
url_720_mp42 = `url_720_mp41`.split("\\")[0]
url_720_mp4 = url_720_mp42.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_720_mp4
#print url_720_mp4
ext = ".mp4"
elif resol == '44':
# Programme to get WebM file in 480 large
y480_webm = re.findall(r'itag=44(.*?)\u0026quality=large', y7)
url_480_webm1 = re.findall(r'\\u0026url=(.*?)\\u0026type', `y480_webm`)
signature = re.findall(r'signature=(.*?)\\', `y480_webm`)
url_480_webm2 = `url_480_webm1`.split("\\")[0]
url_480_webm = url_480_webm2.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_480_webm
#print url_480_webm
ext = ".webm"
elif resol == '35':
# Programme to get a FLV file in 480 large
y480_flv = re.findall(r'itag=35(.*?)\u0026quality=large', y7)
url_480_flv1 = re.findall(r'\\u0026url=(.*?)\\', `y480_flv`)
signature = re.findall(r'signature=(.*?)\\', `y480_flv`)
url_480_flv2 = `url_480_flv1`.split("\\")[0]
url_480_flv = url_480_flv2.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_480_flv
#print url_480_flv
ext = ".flv"
elif resol == '43':
# Programme to get WebM file in 360 medium
y360_webm = re.findall(r'itag=43(.*?)\u0026quality=medium', y7)
url_360_webm1 = re.findall(r'\\u0026url=(.*?)\\', `y360_webm`)
signature = re.findall(r'signature=(.*?)\\', `y360_webm`)
url_360_webm2 = `url_360_webm1`.split("\\")[0]
url_360_webm = url_360_webm2.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_360_webm
#print url_360_webm
ext = ".webm"
elif resol == '34':
# Programme to get FLV file in 360 medium
y360_flv = re.findall(r'itag=34(.*?)\u0026quality=medium', y7)
url_360_flv1 = re.findall(r'\\u0026url=(.*?)\\', `y360_flv`)
signature = re.findall(r'signature=(.*?)\\', `y360_flv`)
url_360_flv2 = `url_360_flv1`.split("\\")[0]
url_360_flv = url_360_flv2.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_360_flv
#print url_360_flv
ext = ".flv"
elif resol == '18':
# Programme to get MP4 file in 360 medium
y360_mp4 = re.findall(r'itag=18(.*?)\u0026quality=medium', y7)
url_360_mp41 = re.findall(r'\\u0026url=(.*?)\\', `y360_mp4`)
signature = re.findall(r'signature=(.*?)\\', `y360_mp4`)
url_360_mp42 = `url_360_mp41`.split("\\")[0]
url_360_mp4 = url_360_mp42.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_360_mp4
#print url_360_mp4
ext = ".mp4"
elif resol == '5':
# Programme to get FLV file in 240 small
y240_flv = re.findall(r'itag=5(.*?)\u0026quality=small', y7)
url_240_flv1 = re.findall(r'\\u0026url=(.*?)\\', `y240_flv`)
signature = re.findall(r'signature=(.*?)\\', `y240_flv`)
url_240_flv2 = `url_240_flv1`.split("\\")[0]
url_240_flv = url_240_flv2.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_240_flv
#print url_240_flv
ext = ".flv"
elif resol == '36':
# Programme to get 3gpp file in 180 small
y180_3gpp = re.findall(r'itag=36(.*?)\u0026quality=small', y7)
url_180_3gpp1 = re.findall(r'\\u0026url=(.*?)\\', `y180_3gpp`)
signature = re.findall(r'signature=(.*?)\\', `y180_3gpp`)
url_180_3gpp2 = `url_180_3gpp1`.split("\\")[0]
url_180_3gpp = url_180_3gpp2.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_180_3gpp
#print url_180_3gpp
ext = ".3gpp"
elif resol == '17':
# Programme to get 3gpp file in 144 small
y144_3gpp = re.findall(r'itag=17(.*?)\u0026quality=small', y7)
url_144_3gpp1 = re.findall(r'\\u0026url=(.*?)\\', `y144_3gpp`)
signature = re.findall(r'signature=(.*?)\\', `y144_3gpp`)
url_144_3gpp2 = `url_144_3gpp1`.split("\\")[0]
url_144_3gpp = url_144_3gpp2.split("'")[1] + "&signature=" + `signature`.split("'")[1] + "&ptk=machinima"
url = url_144_3gpp
#print url_144_3gpp
ext = ".3gpp"
#newindex = open("index1.txt", 'w')
#newindex.write(y7)
print url
filename = filename2 + ext
print filename
req = urllib2.Request(url, headers={'Range': "bytes=0-838860800"})
data = urllib2.urlopen(req)
print "connected to ""http://"+url.split("/")[2] + "/"
f = open(filename,'wb')
meta_data = data.info()
file_size = int(meta_data.getheaders("Content-Length")[0])
print "filesize= " + `file_size/1048576` + " MB"
bytes_received = 0
chunk_size = 10240
while True:
start_time = time.time()
buffer = data.read(chunk_size)
if not buffer:
break
bytes_received += len(buffer)
f.write(buffer)
Td = time.time() - start_time
speed1 = round(len(buffer)/1024.0, 1)
speed = round(speed1/Td, 1)
speed_MB = round(speed/1024.0, 1)
speed_GB = round(speed_MB/1024.0, 1)
bytes_received_MB = round(bytes_received/1048576.0, 3)
percent = bytes_received * 100. / file_size
if speed < 1:
speed_byte = round(len(buffer)/Td, 1)
Tr = (file_size-bytes_received)/(60*speed_byte)
status = r"[Downloaded=%.3f MB] [%3.2f%%] [speed= %.1f B/s] [eta %1d min] " % (bytes_received_MB, percent, speed_byte, Tr)
elif speed < 1024:
Tr = (file_size-bytes_received)/(60*1024*speed)
status = r"[Downloaded=%.3f MB] [%3.2f%%] [speed= %.1f KB/s] [eta %1d min] " % (bytes_received_MB, percent, speed, Tr)
elif speed < 1048576:
Tr = (file_size-bytes_received)/(60*1024*1024*speed_MB)
status = r"[Downloaded=%.3f MB] [%3.2f%%] [speed= %.1f MB/s] [eta %1d min] " % (bytes_received_MB, percent, speed_MB, Tr)
else:
Tr = (file_size-bytes_received)/(60*1024*1024*1024*speed_GB)
status = r"[Downloaded=%.3f MB] [%3.2f%%] [speed= %.1f GB/s] [eta %1d min] " % (bytes_received_MB, percent, speed_GB, Tr)
status = status + chr(8) * (len(status) + 1)
print status,

Categories

Resources