Get newest video detail youtube instantaneously

Get newest video detail youtube instantaneously - python

I created a script that checks for newest video and then comment on it as soon as it finds it. The problem is the script sometimes get the video 4 mins after it is uploaded and sometimes 30 sec after the video is uploaded. I want to get the video as soon as it uploads. The code is as under
import time, requests, os
from Google import Create_Service
from subprocess import Popen
SECRET_FILE = 'client.json'
APINAME = 'youtube'
APIVERSION = 'v3'
SCOPE = ["https://www.googleapis.com/auth/youtube.force-ssl"]
service_y = Create_Service(SECRET_FILE, APINAME, APIVERSION, SCOPE)
channel = "https://www.youtube.com/user/UCBqIaQItMBrQjK6NOXB1eQQ"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
}
comment_text = "First comment. Congrats on 100 Mil Subs - I remember watching you count to 1 million."
vid = "sds"
while True:
try:
res = requests.get(channel + "/videos", headers=headers)
except:
script2 = open("script2.py","w")
script2.writelines(["from subprocess import Popen\n","import time,os\n","time.sleep(2)\n",f"Popen('python {os.path.basename(__file__)}')\n","exit(0)"])
Popen("python script2.py")
exit(0)
html = res.text
html = html.split("publishedTimeText")[1].split("}")[0]
print(html)
time.sleep(1)
print(vid)
if vid == html:
pass
else:
if vid == "sds":
vid = html
else:
request = service_y.search().list(part="id", channelId="UCBqIaQItMBrQjK6NOXB1eQQ", order="date", maxResults="1")
response = request.execute()
vid = response["items"][0]["id"]["videoId"]
request = service_y.commentThreads().insert(
part="snippet",
body={
"snippet": {
"videoId": vid,
"topLevelComment": {
"snippet": {
"textOriginal": comment_text
}
}
}
}
)
response = request.execute()
break
Thanks

Related

How can I add threads to this python code to make multiple request

i am creating a custom tool for login bruteforce on web application for bug bounty hunting so i came to a bug on one web application which i had to create my own tool to bruteforce this is not a complete tool but i need solution for the current code for adding threads
import requests
import re
exploit = open('password.txt', 'r').readlines()
headers = {
'Host': 'TARGET.COM',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'close',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'iframe',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1'
}
for line in exploit:
params = {
'execution': '111u9342',
'client_id': 'client-23429df',
'tab_id': '234324',
}
password = line.strip()
http = requests.post('https://www.target.com/test',
params=params,
headers=headers,
data={'username':myname,'password':password},
verify=False,
proxies=proxies)
content = http.content
print("finished")
I am beginner in python

You can use it ThreadPoolExecuter;
from concurrent.futures import ThreadPoolExecutor
import requests
# ....Other code parts...
def base_post(url, header, data, proxies, timeout=10):
response = requests.post(url, headers=header, data=data, proxies=proxies, timeout=timeout)
return response
total_possibilities = []
exploit = []
for line in exploit:
params = {
'execution': '111u9342',
'client_id': 'client-23429df',
'tab_id': '234324',
}
password = line.strip()
total_possibilities.append({'url': "...",
"params": params,
"headers": headers,
"data": {'username': myname, 'password': password},
"verify": False,
"proxies": proxies
"content": http.content})
results = []
with ThreadPoolExecutor(max_workers=3) as executor:
for row in total_possibilities:
results.append(executor.submit(base_post, **row))
print(results)
Don't forget to update "max_workers" based on your needs.

Scraping Data from booking with python

hope you're doing well !
So i'm trying to scrape data from Booking (name of Hotel , room ..) , i run the code it's work but i don't get the data in the excel file, the data file is empty !
This is my code :
# Create an Extractor by reading from the YAML file
e = Extractor.from_yaml_file('C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/booking.yml')
def scrape(url):
headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'DNT': '1',
'Upgrade-Insecure-Requests': '1',
# You may want to change the user agent if you get blocked
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Referer': 'https://www.booking.com/index.en-gb.html',
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
}
# Download the page using requests
print("Downloading %s"%url)
r = requests.get(url, headers=headers)
# Pass the HTML of the page and create
return e.extract(r.text,base_url=url)
with open("C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/urls.txt",'r') as urllist, open('C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/data.csv','w') as outfile:
fieldnames = [
"name",
"location",
"price",
"price_for",
"room_type",
"beds",
"rating",
"rating_title",
"number_of_ratings",
"url"
]
writer = csv.DictWriter(outfile, fieldnames=fieldnames,quoting=csv.QUOTE_ALL)
writer.writeheader()
for url in urllist.readlines():
data = scrape(url)
if data and data['hotels'] is not None:
for h in data["hotels"]:
writer.writerow(h)
And this is the result in the excel file :
There is no error in mycode it's only about how to get this data.
The booking.yml :

Read URLs from external file

I found the following TikTok Downloader which is working fine.
from argparse import ArgumentParser
import os
from urllib.parse import parse_qsl, urlparse
import requests
class TikTokDownloader:
HEADERS = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'DNT': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
'Accept': '*/*',
'Sec-Fetch-Site': 'same-site',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Dest': 'video',
'Referer': 'https://www.tiktok.com/',
'Accept-Language': 'en-US,en;q=0.9,bs;q=0.8,sr;q=0.7,hr;q=0.6',
'sec-gpc': '1',
'Range': 'bytes=0-',
}
def __init__(self, url: str, web_id: str):
self.__url = url
self.__cookies = {
'tt_webid': web_id,
'tt_webid_v2': web_id
}
def __get_video_url(self) -> str:
response = requests.get(self.__url, cookies=self.__cookies, headers=TikTokDownloader.HEADERS)
return response.text.split('"playAddr":"')[1].split('"')[0].replace(r'\u0026', '&')
def download(self, file_path: str):
video_url = self.__get_video_url()
url = urlparse(video_url)
params = tuple(parse_qsl(url.query))
request = requests.Request(method='GET',
url='{}://{}{}'.format(url.scheme,
url.netloc, url.path),
cookies=self.__cookies,
headers=TikTokDownloader.HEADERS,
params=params)
prepared_request = request.prepare()
session = requests.Session()
response = session.send(request=prepared_request)
response.raise_for_status()
if os.path.exists(file_path):
choice = input('File already exists. Overwrite? (Y/N): ')
if choice.lower() != 'y':
return
with open(os.path.abspath(file_path), 'wb') as output_file:
output_file.write(response.content)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('--web-id', help='Value of tt_webid or tt_webid_v2 cookie (they are the same).')
parser.add_argument('-o', '--output', default='download.mp4', help='Full output path.')
parser.add_argument('url', help='Video url (https://www.tiktok.com/#username/video/1234567890123456789 or https://vm.tiktok.com/a1b2c3/).')
args = parser.parse_args()
downloader = TikTokDownloader(args.url, args.web_id)
downloader.download(args.output)
The issue is that I have to run this command to download each video:
python3 ./tiktok.py --web-id 1234567890123 -o ./file.mp4 https://vm.tiktok.com/...
And I have 1000 links to download. All the links are in A txt file without comma. Like:
Https://tiktok.com/1
Https://tiktok.com/2
Https://tiktok.com/3
So- I'm looking to find a way to read the text file and automatically replace the link in the command that I have to run. Or should I change the actual script?

Use my code please, I have just defined a function that will help you to download all those videos by just entering the path where the file with a thousand links is located, preferably save this python script in the same directory where your file with a thousand links is located:
Use the function
A_thousand_links_jbsidis("my_file_with_1000_links.txt")
This is going to put automatic names to each video based on date and time, I tested it and it works!
Here is the code by jbsidis:
from argparse import ArgumentParser
import os
from urllib.parse import parse_qsl, urlparse
import requests
class TikTokDownloaderjbsidis:
HEADERS = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'DNT': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
'Accept': '*/*',
'Sec-Fetch-Site': 'same-site',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Dest': 'video',
'Referer': 'https://www.tiktok.com/',
'Accept-Language': 'en-US,en;q=0.9,bs;q=0.8,sr;q=0.7,hr;q=0.6',
'sec-gpc': '1',
'Range': 'bytes=0-',
}
def __init__(self, url, web_id):
self.__url = url
self.__cookies = {
'tt_webid': web_id,
'tt_webid_v2': web_id
}
def __get_video_url(self) -> str:
response = requests.get(self.__url, cookies=self.__cookies, headers=TikTokDownloaderjbsidis.HEADERS)
return response.text.split('"playAddr":"')[1].split('"')[0].replace(r'\u0026', '&')
def download(self, file_path: str):
video_url = self.__get_video_url()
url = urlparse(video_url)
params = tuple(parse_qsl(url.query))
request = requests.Request(method='GET',
url='{}://{}{}'.format(url.scheme,
url.netloc, url.path),
cookies=self.__cookies,
headers=TikTokDownloaderjbsidis.HEADERS,
params=params)
prepared_request = request.prepare()
session = requests.Session()
response = session.send(request=prepared_request)
response.raise_for_status()
if os.path.exists(file_path):
choice = str('jbsidis File already exists. Overwrite? (Y/N): ')
print("Downloading jbsidis == "+str(file_path))
with open(os.path.abspath(file_path), 'wb') as output_file:
output_file.write(response.content)
import time
import random
def A_thousand_links_jbsidis(file_with_a_thousand_links):
n=open(file_with_a_thousand_links).read()
m=n.splitlines() #guessing the links are per line
MyWebIDis="1234567890123" #put the id that works for you
c=0
for new_url in m:
c=c+1
new_auto_file_name=str(c)+" - "+str(time.strftime("_%Y%m%d_%H%M%S_"))+"_video_"+".mp4" #i guess they are mp4
clean_url=str(new_url).replace("\n","").replace("\x0a","").replace("\x0d","").replace(" ","")
downloader = TikTokDownloaderjbsidis(clean_url, MyWebIDis)
downloader.download(new_auto_file_name)
time.sleep(10) #just in case the internet is not that fast, wait 10 seconds after next download
A_thousand_links_jbsidis("my_file_with_1000_links.txt")
And here is the image, I don't know why sometimes we answer questions without giving a real solution, greetings from El Salvador.
jbsidis

Script gets stuck while sending post requests with parameters

I'm trying to populate json response issuing a post http requests with appropriate parameters from a webpage. When I run the script, I see that the script gets stuck and doesn't bring any result. It doesn't throw any error either. This is the site link. I chose three options from the three dropdowns from this form in that site before hitting Get times & tickets button.
I've tried with:
import requests
from bs4 import BeautifulSoup
url = 'https://www.thetrainline.com/'
link = 'https://www.thetrainline.com/api/journey-search/'
payload = {"passengers":[{"dateOfBirth":"1991-01-31"}],"isEurope":False,"cards":[],"transitDefinitions":[{"direction":"outward","origin":"1f06fc66ccd7ea92ae4b0a550e4ddfd1","destination":"7c25e933fd14386745a7f49423969308","journeyDate":{"type":"departAfter","time":"2021-02-11T22:45:00"}}],"type":"single","maximumJourneys":4,"includeRealtime":True,"applyFareDiscounts":True}
with requests.Session() as s:
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'
s.headers['content-type'] = 'application/json'
s.headers['accept'] = 'application/json'
r = s.post(link,json=payload)
print(r.status_code)
print(r.json())
How can I get json response issuing post requests with parameters from that site?

You are missing the required headers: x-version and referer. The referer header is referring to the search form and you can build it. Before journey-search you have to post an availability request.
import requests
from requests.models import PreparedRequest
headers = {
'authority': 'www.thetrainline.com',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'x-version': '2.0.18186',
'dnt': '1',
'accept-language': 'en-GB',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/88.0.4324.96 Safari/537.36',
'content-type': 'application/json',
'accept': 'application/json',
'origin': 'https://www.thetrainline.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
}
with requests.Session() as s:
origin = "6e2242b3f38bbbd8d8124e1d84d319e1"
destination = "15bcf02bc44ea754837c8cf14569f608"
localDateTime = "2021-02-03T19:30:00"
dateOfBirth = "1991-02-03"
passenger_type = "single"
req = PreparedRequest()
url = "http://www.neo4j.com"
params = {
"origin": origin,
"destination": destination,
"outwardDate": localDateTime,
"outwardDateType": "departAfter",
"journeySearchType": passenger_type,
"passengers[]": dateOfBirth
}
req.prepare_url("https://www.thetrainline.com/book/results", params)
headers.update({"referer": req.url})
s.headers = headers
payload_availability = {
"origin": origin,
"destination": destination,
"outwardDefinition": {
"localDateTime": localDateTime,
"searchMethod": "DEPARTAFTER"
},
"passengerBirthDates": [{
"id": "PASSENGER-0",
"dateOfBirth": dateOfBirth
}],
"maximumNumberOfJourneys": 4,
"discountCards": []
}
r = s.post('https://www.thetrainline.com/api/coaches/availability', json=payload_availability)
r.raise_for_status()
payload_search = {
"passengers": [{"dateOfBirth": "1991-02-03"}],
"isEurope": False,
"cards": [],
"transitDefinitions": [{
"direction": "outward",
"origin": origin,
"destination": destination,
"journeyDate": {
"type": "departAfter",
"time": localDateTime}
}],
"type": passenger_type,
"maximumJourneys": 4,
"includeRealtime": True,
"applyFareDiscounts": True
}
r = s.post('https://www.thetrainline.com/api/journey-search/', json=payload_search)
r.raise_for_status()
print(r.json())

As Sers's reply, headers are missing.
When scrawling websites, you have to keep in mind anti-scrawling mechanism. The website will block your requests by taking into consideration your IP address, request headers, cookies, and various other factors.

Download file from POST request in scrapy

I know there is builtin middleware to handle downloadings. but it only accept a url. but in my case, my downloading link is a POST request.
When i made that POST request pdf file starts downloading.
Now i want to download that file from POST request in scrapy.
Website is http://scrb.bihar.gov.in/View_FIR.aspx
You can enter district Aurangabad and police station Kasma PS
On last column status there is a link to downloading file.
ps_x = '//*[#id="ctl00_ContentPlaceHolder1_ddlPoliceStation"]//option[.="Kasma PS"]/#value'
police_station_val = response.xpath(ps_x).extract_first()
d_x = '//*[#id="ctl00_ContentPlaceHolder1_ddlDistrict"]//option[.="Aurangabad"]/#value'
district_val = response.xpath(d_x).extract_first()
viewstate = response.xpath(self.viewstate_x).extract_first()
viewstategen = response.xpath(self.viewstategen_x).extract_first()
eventvalidator = response.xpath(self.eventvalidator_x).extract_first()
eventtarget = response.xpath(self.eventtarget_x).extract_first()
eventargs = response.xpath(self.eventargs_x).extract_first()
lastfocus = response.xpath(self.lastfocus_x).extract_first()
payload = {
'__EVENTTARGET': eventtarget,
'__EVENTARGUMENT': eventargs,
'__LASTFOCUS': lastfocus,
'__VIEWSTATE': viewstate,
'__VIEWSTATEGENERATOR': viewstategen,
'__EVENTVALIDATION': eventvalidator,
'ctl00$ContentPlaceHolder1$ddlDistrict': district_val,
'ctl00$ContentPlaceHolder1$ddlPoliceStation': police_station_val,
'ctl00$ContentPlaceHolder1$optionsRadios': 'radioPetioner',
'ctl00$ContentPlaceHolder1$txtSearchBy': '',
'ctl00$ContentPlaceHolder1$rptItem$ctl06$lnkStatus.x': '21',
'ctl00$ContentPlaceHolder1$rptItem$ctl06$lnkStatus.y': '24',
}
headers = {
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Origin': 'http://scrb.bihar.gov.in',
'Upgrade-Insecure-Requests': '1',
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Referer': 'http://scrb.bihar.gov.in/View_FIR.aspx',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-US,en;q=0.9',
}
# req = requests.post(response.url, data=payload, headers=headers)
# with open('pdf/ch.pdf', 'w+b') as f:
# f.write(req.content)

When You click donwload, webbrowser sends POST request.
So this answer mentioned by El Ruso earlier is applyable in your case
.....
def parse(self, response):
......
yield scrapy.FormRequest("http://scrb.bihar.gov.in/View_FIR.aspx",.#your post request configuration, callback=self.save_pdf)
def save_pdf(self, response):
path = response.url.split('/')[-1]
self.logger.info('Saving PDF %s', path)
with open(path, 'wb') as f:
f.write(response.body)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Get newest video detail youtube instantaneously - python

Related

How can I add threads to this python code to make multiple request

Scraping Data from booking with python

Read URLs from external file

Script gets stuck while sending post requests with parameters

Download file from POST request in scrapy

Categories

Resources