Get newest video detail youtube instantaneously - python

I created a script that checks for newest video and then comment on it as soon as it finds it. The problem is the script sometimes get the video 4 mins after it is uploaded and sometimes 30 sec after the video is uploaded. I want to get the video as soon as it uploads. The code is as under
import time, requests, os
from Google import Create_Service
from subprocess import Popen
SECRET_FILE = 'client.json'
APINAME = 'youtube'
APIVERSION = 'v3'
SCOPE = ["https://www.googleapis.com/auth/youtube.force-ssl"]
service_y = Create_Service(SECRET_FILE, APINAME, APIVERSION, SCOPE)
channel = "https://www.youtube.com/user/UCBqIaQItMBrQjK6NOXB1eQQ"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
}
comment_text = "First comment. Congrats on 100 Mil Subs - I remember watching you count to 1 million."
vid = "sds"
while True:
try:
res = requests.get(channel + "/videos", headers=headers)
except:
script2 = open("script2.py","w")
script2.writelines(["from subprocess import Popen\n","import time,os\n","time.sleep(2)\n",f"Popen('python {os.path.basename(__file__)}')\n","exit(0)"])
Popen("python script2.py")
exit(0)
html = res.text
html = html.split("publishedTimeText")[1].split("}")[0]
print(html)
time.sleep(1)
print(vid)
if vid == html:
pass
else:
if vid == "sds":
vid = html
else:
request = service_y.search().list(part="id", channelId="UCBqIaQItMBrQjK6NOXB1eQQ", order="date", maxResults="1")
response = request.execute()
vid = response["items"][0]["id"]["videoId"]
request = service_y.commentThreads().insert(
part="snippet",
body={
"snippet": {
"videoId": vid,
"topLevelComment": {
"snippet": {
"textOriginal": comment_text
}
}
}
}
)
response = request.execute()
break
Thanks

Related

How can I add threads to this python code to make multiple request

i am creating a custom tool for login bruteforce on web application for bug bounty hunting so i came to a bug on one web application which i had to create my own tool to bruteforce this is not a complete tool but i need solution for the current code for adding threads
import requests
import re
exploit = open('password.txt', 'r').readlines()
headers = {
'Host': 'TARGET.COM',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'close',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'iframe',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1'
}
for line in exploit:
params = {
'execution': '111u9342',
'client_id': 'client-23429df',
'tab_id': '234324',
}
password = line.strip()
http = requests.post('https://www.target.com/test',
params=params,
headers=headers,
data={'username':myname,'password':password},
verify=False,
proxies=proxies)
content = http.content
print("finished")
I am beginner in python
You can use it ThreadPoolExecuter;
from concurrent.futures import ThreadPoolExecutor
import requests
# ....Other code parts...
def base_post(url, header, data, proxies, timeout=10):
response = requests.post(url, headers=header, data=data, proxies=proxies, timeout=timeout)
return response
total_possibilities = []
exploit = []
for line in exploit:
params = {
'execution': '111u9342',
'client_id': 'client-23429df',
'tab_id': '234324',
}
password = line.strip()
total_possibilities.append({'url': "...",
"params": params,
"headers": headers,
"data": {'username': myname, 'password': password},
"verify": False,
"proxies": proxies
"content": http.content})
results = []
with ThreadPoolExecutor(max_workers=3) as executor:
for row in total_possibilities:
results.append(executor.submit(base_post, **row))
print(results)
Don't forget to update "max_workers" based on your needs.

Scraping Data from booking with python

hope you're doing well !
So i'm trying to scrape data from Booking (name of Hotel , room ..) , i run the code it's work but i don't get the data in the excel file, the data file is empty !
This is my code :
# Create an Extractor by reading from the YAML file
e = Extractor.from_yaml_file('C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/booking.yml')
def scrape(url):
headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'DNT': '1',
'Upgrade-Insecure-Requests': '1',
# You may want to change the user agent if you get blocked
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Referer': 'https://www.booking.com/index.en-gb.html',
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
}
# Download the page using requests
print("Downloading %s"%url)
r = requests.get(url, headers=headers)
# Pass the HTML of the page and create
return e.extract(r.text,base_url=url)
with open("C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/urls.txt",'r') as urllist, open('C:/Users/pc/OneDrive/Bureau/booking-hotel-scraper-master/data.csv','w') as outfile:
fieldnames = [
"name",
"location",
"price",
"price_for",
"room_type",
"beds",
"rating",
"rating_title",
"number_of_ratings",
"url"
]
writer = csv.DictWriter(outfile, fieldnames=fieldnames,quoting=csv.QUOTE_ALL)
writer.writeheader()
for url in urllist.readlines():
data = scrape(url)
if data and data['hotels'] is not None:
for h in data["hotels"]:
writer.writerow(h)
And this is the result in the excel file :
There is no error in mycode it's only about how to get this data.
The booking.yml :

Read URLs from external file

I found the following TikTok Downloader which is working fine.
from argparse import ArgumentParser
import os
from urllib.parse import parse_qsl, urlparse
import requests
class TikTokDownloader:
HEADERS = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'DNT': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
'Accept': '*/*',
'Sec-Fetch-Site': 'same-site',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Dest': 'video',
'Referer': 'https://www.tiktok.com/',
'Accept-Language': 'en-US,en;q=0.9,bs;q=0.8,sr;q=0.7,hr;q=0.6',
'sec-gpc': '1',
'Range': 'bytes=0-',
}
def __init__(self, url: str, web_id: str):
self.__url = url
self.__cookies = {
'tt_webid': web_id,
'tt_webid_v2': web_id
}
def __get_video_url(self) -> str:
response = requests.get(self.__url, cookies=self.__cookies, headers=TikTokDownloader.HEADERS)
return response.text.split('"playAddr":"')[1].split('"')[0].replace(r'\u0026', '&')
def download(self, file_path: str):
video_url = self.__get_video_url()
url = urlparse(video_url)
params = tuple(parse_qsl(url.query))
request = requests.Request(method='GET',
url='{}://{}{}'.format(url.scheme,
url.netloc, url.path),
cookies=self.__cookies,
headers=TikTokDownloader.HEADERS,
params=params)
prepared_request = request.prepare()
session = requests.Session()
response = session.send(request=prepared_request)
response.raise_for_status()
if os.path.exists(file_path):
choice = input('File already exists. Overwrite? (Y/N): ')
if choice.lower() != 'y':
return
with open(os.path.abspath(file_path), 'wb') as output_file:
output_file.write(response.content)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('--web-id', help='Value of tt_webid or tt_webid_v2 cookie (they are the same).')
parser.add_argument('-o', '--output', default='download.mp4', help='Full output path.')
parser.add_argument('url', help='Video url (https://www.tiktok.com/#username/video/1234567890123456789 or https://vm.tiktok.com/a1b2c3/).')
args = parser.parse_args()
downloader = TikTokDownloader(args.url, args.web_id)
downloader.download(args.output)
The issue is that I have to run this command to download each video:
python3 ./tiktok.py --web-id 1234567890123 -o ./file.mp4 https://vm.tiktok.com/...
And I have 1000 links to download. All the links are in A txt file without comma. Like:
Https://tiktok.com/1
Https://tiktok.com/2
Https://tiktok.com/3
So- I'm looking to find a way to read the text file and automatically replace the link in the command that I have to run. Or should I change the actual script?
Use my code please, I have just defined a function that will help you to download all those videos by just entering the path where the file with a thousand links is located, preferably save this python script in the same directory where your file with a thousand links is located:
Use the function
A_thousand_links_jbsidis("my_file_with_1000_links.txt")
This is going to put automatic names to each video based on date and time, I tested it and it works!
Here is the code by jbsidis:
from argparse import ArgumentParser
import os
from urllib.parse import parse_qsl, urlparse
import requests
class TikTokDownloaderjbsidis:
HEADERS = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'DNT': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
'Accept': '*/*',
'Sec-Fetch-Site': 'same-site',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Dest': 'video',
'Referer': 'https://www.tiktok.com/',
'Accept-Language': 'en-US,en;q=0.9,bs;q=0.8,sr;q=0.7,hr;q=0.6',
'sec-gpc': '1',
'Range': 'bytes=0-',
}
def __init__(self, url, web_id):
self.__url = url
self.__cookies = {
'tt_webid': web_id,
'tt_webid_v2': web_id
}
def __get_video_url(self) -> str:
response = requests.get(self.__url, cookies=self.__cookies, headers=TikTokDownloaderjbsidis.HEADERS)
return response.text.split('"playAddr":"')[1].split('"')[0].replace(r'\u0026', '&')
def download(self, file_path: str):
video_url = self.__get_video_url()
url = urlparse(video_url)
params = tuple(parse_qsl(url.query))
request = requests.Request(method='GET',
url='{}://{}{}'.format(url.scheme,
url.netloc, url.path),
cookies=self.__cookies,
headers=TikTokDownloaderjbsidis.HEADERS,
params=params)
prepared_request = request.prepare()
session = requests.Session()
response = session.send(request=prepared_request)
response.raise_for_status()
if os.path.exists(file_path):
choice = str('jbsidis File already exists. Overwrite? (Y/N): ')
print("Downloading jbsidis == "+str(file_path))
with open(os.path.abspath(file_path), 'wb') as output_file:
output_file.write(response.content)
import time
import random
def A_thousand_links_jbsidis(file_with_a_thousand_links):
n=open(file_with_a_thousand_links).read()
m=n.splitlines() #guessing the links are per line
MyWebIDis="1234567890123" #put the id that works for you
c=0
for new_url in m:
c=c+1
new_auto_file_name=str(c)+" - "+str(time.strftime("_%Y%m%d_%H%M%S_"))+"_video_"+".mp4" #i guess they are mp4
clean_url=str(new_url).replace("\n","").replace("\x0a","").replace("\x0d","").replace(" ","")
downloader = TikTokDownloaderjbsidis(clean_url, MyWebIDis)
downloader.download(new_auto_file_name)
time.sleep(10) #just in case the internet is not that fast, wait 10 seconds after next download
A_thousand_links_jbsidis("my_file_with_1000_links.txt")
And here is the image, I don't know why sometimes we answer questions without giving a real solution, greetings from El Salvador.
jbsidis

Script gets stuck while sending post requests with parameters

I'm trying to populate json response issuing a post http requests with appropriate parameters from a webpage. When I run the script, I see that the script gets stuck and doesn't bring any result. It doesn't throw any error either. This is the site link. I chose three options from the three dropdowns from this form in that site before hitting Get times & tickets button.
I've tried with:
import requests
from bs4 import BeautifulSoup
url = 'https://www.thetrainline.com/'
link = 'https://www.thetrainline.com/api/journey-search/'
payload = {"passengers":[{"dateOfBirth":"1991-01-31"}],"isEurope":False,"cards":[],"transitDefinitions":[{"direction":"outward","origin":"1f06fc66ccd7ea92ae4b0a550e4ddfd1","destination":"7c25e933fd14386745a7f49423969308","journeyDate":{"type":"departAfter","time":"2021-02-11T22:45:00"}}],"type":"single","maximumJourneys":4,"includeRealtime":True,"applyFareDiscounts":True}
with requests.Session() as s:
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'
s.headers['content-type'] = 'application/json'
s.headers['accept'] = 'application/json'
r = s.post(link,json=payload)
print(r.status_code)
print(r.json())
How can I get json response issuing post requests with parameters from that site?
You are missing the required headers: x-version and referer. The referer header is referring to the search form and you can build it. Before journey-search you have to post an availability request.
import requests
from requests.models import PreparedRequest
headers = {
'authority': 'www.thetrainline.com',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'x-version': '2.0.18186',
'dnt': '1',
'accept-language': 'en-GB',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/88.0.4324.96 Safari/537.36',
'content-type': 'application/json',
'accept': 'application/json',
'origin': 'https://www.thetrainline.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
}
with requests.Session() as s:
origin = "6e2242b3f38bbbd8d8124e1d84d319e1"
destination = "15bcf02bc44ea754837c8cf14569f608"
localDateTime = "2021-02-03T19:30:00"
dateOfBirth = "1991-02-03"
passenger_type = "single"
req = PreparedRequest()
url = "http://www.neo4j.com"
params = {
"origin": origin,
"destination": destination,
"outwardDate": localDateTime,
"outwardDateType": "departAfter",
"journeySearchType": passenger_type,
"passengers[]": dateOfBirth
}
req.prepare_url("https://www.thetrainline.com/book/results", params)
headers.update({"referer": req.url})
s.headers = headers
payload_availability = {
"origin": origin,
"destination": destination,
"outwardDefinition": {
"localDateTime": localDateTime,
"searchMethod": "DEPARTAFTER"
},
"passengerBirthDates": [{
"id": "PASSENGER-0",
"dateOfBirth": dateOfBirth
}],
"maximumNumberOfJourneys": 4,
"discountCards": []
}
r = s.post('https://www.thetrainline.com/api/coaches/availability', json=payload_availability)
r.raise_for_status()
payload_search = {
"passengers": [{"dateOfBirth": "1991-02-03"}],
"isEurope": False,
"cards": [],
"transitDefinitions": [{
"direction": "outward",
"origin": origin,
"destination": destination,
"journeyDate": {
"type": "departAfter",
"time": localDateTime}
}],
"type": passenger_type,
"maximumJourneys": 4,
"includeRealtime": True,
"applyFareDiscounts": True
}
r = s.post('https://www.thetrainline.com/api/journey-search/', json=payload_search)
r.raise_for_status()
print(r.json())
As Sers's reply, headers are missing.
When scrawling websites, you have to keep in mind anti-scrawling mechanism. The website will block your requests by taking into consideration your IP address, request headers, cookies, and various other factors.

Download file from POST request in scrapy

I know there is builtin middleware to handle downloadings. but it only accept a url. but in my case, my downloading link is a POST request.
When i made that POST request pdf file starts downloading.
Now i want to download that file from POST request in scrapy.
Website is http://scrb.bihar.gov.in/View_FIR.aspx
You can enter district Aurangabad and police station Kasma PS
On last column status there is a link to downloading file.
ps_x = '//*[#id="ctl00_ContentPlaceHolder1_ddlPoliceStation"]//option[.="Kasma PS"]/#value'
police_station_val = response.xpath(ps_x).extract_first()
d_x = '//*[#id="ctl00_ContentPlaceHolder1_ddlDistrict"]//option[.="Aurangabad"]/#value'
district_val = response.xpath(d_x).extract_first()
viewstate = response.xpath(self.viewstate_x).extract_first()
viewstategen = response.xpath(self.viewstategen_x).extract_first()
eventvalidator = response.xpath(self.eventvalidator_x).extract_first()
eventtarget = response.xpath(self.eventtarget_x).extract_first()
eventargs = response.xpath(self.eventargs_x).extract_first()
lastfocus = response.xpath(self.lastfocus_x).extract_first()
payload = {
'__EVENTTARGET': eventtarget,
'__EVENTARGUMENT': eventargs,
'__LASTFOCUS': lastfocus,
'__VIEWSTATE': viewstate,
'__VIEWSTATEGENERATOR': viewstategen,
'__EVENTVALIDATION': eventvalidator,
'ctl00$ContentPlaceHolder1$ddlDistrict': district_val,
'ctl00$ContentPlaceHolder1$ddlPoliceStation': police_station_val,
'ctl00$ContentPlaceHolder1$optionsRadios': 'radioPetioner',
'ctl00$ContentPlaceHolder1$txtSearchBy': '',
'ctl00$ContentPlaceHolder1$rptItem$ctl06$lnkStatus.x': '21',
'ctl00$ContentPlaceHolder1$rptItem$ctl06$lnkStatus.y': '24',
}
headers = {
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Origin': 'http://scrb.bihar.gov.in',
'Upgrade-Insecure-Requests': '1',
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Referer': 'http://scrb.bihar.gov.in/View_FIR.aspx',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-US,en;q=0.9',
}
# req = requests.post(response.url, data=payload, headers=headers)
# with open('pdf/ch.pdf', 'w+b') as f:
# f.write(req.content)
When You click donwload, webbrowser sends POST request.
So this answer mentioned by El Ruso earlier is applyable in your case
.....
def parse(self, response):
......
yield scrapy.FormRequest("http://scrb.bihar.gov.in/View_FIR.aspx",.#your post request configuration, callback=self.save_pdf)
def save_pdf(self, response):
path = response.url.split('/')[-1]
self.logger.info('Saving PDF %s', path)
with open(path, 'wb') as f:
f.write(response.body)

Categories

Resources