Browse Source

Added Gfycat API support, changed certificates

* Certificates are now more appropriately named (liked_saved_server
instead of server_jupyter_based). The selection code is backwards compatible
* Added dependency on py-gfycat. The version in Pip is very
out-of-date, so I instead install it straight from Github. I added
Gfycat settings and instructions. Gfycat URLs are parsed for their
gfynames, then the MP4 URL is retrieved from Gfycat through the API
* #22: Auto-create output_dir if it doesn't already exist. Image Saver
was doing this but the parent script was not
database-forgotten
Macoy Madson 1 year ago
parent
commit
213c9a10a3
8 changed files with 81 additions and 25 deletions
  1. +1
    -1
      Generate_Certificates.sh
  2. +9
    -2
      LikedSavedDownloaderServer.py
  3. +4
    -5
      ReadMe.org
  4. +28
    -14
      imageSaver.py
  5. +5
    -0
      redditUserImageScraper.py
  6. +24
    -3
      settings.py
  7. +6
    -0
      settings.txt
  8. +4
    -0
      utilities.py

+ 1
- 1
Generate_Certificates.sh View File

@ -1,3 +1,3 @@
#!/bin/bash
mkdir certificates
openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout certificates/server_jupyter_based.crt.key -out certificates/server_jupyter_based.crt.pem
openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout certificates/liked_saved_server.crt.key -out certificates/liked_saved_server.crt.pem

+ 9
- 2
LikedSavedDownloaderServer.py View File

@ -627,8 +627,15 @@ if __name__ == '__main__':
# I want a free certificate for this non-serious project)
useSSL = True
if useSSL:
app.listen(port, ssl_options={"certfile":"certificates/server_jupyter_based.crt.pem",
"keyfile":"certificates/server_jupyter_based.crt.key"})
if os.path.exists("certificates/liked_saved_server.crt.pem"):
app.listen(port, ssl_options={"certfile":"certificates/liked_saved_server.crt.pem",
"keyfile":"certificates/liked_saved_server.crt.key"})
# For backwards compatibility
elif os.path.exists("certificates/server_jupyter_based.crt.pem"):
app.listen(port, ssl_options={"certfile":"certificates/server_jupyter_based.crt.pem",
"keyfile":"certificates/server_jupyter_based.crt.key"})
else:
print('\n\tERROR: Certificates non-existent! Run ./Generate_Certificates.sh to create them')
else:
# Show the warning only if SSL is not enabled
print('\n\tWARNING: Do NOT run this server on the internet (e.g. port-forwarded)'


+ 4
- 5
ReadMe.org View File

@ -15,7 +15,7 @@ Use this awesome Python 2 or 3 script to download
** Directions
*** 1. You have to download this repository (obviously)
*** 1. Clone this repository
#+BEGIN_SRC sh
git clone https://github.com/makuto/redditLikedSavedImageDownloader
@ -26,7 +26,7 @@ git clone https://github.com/makuto/redditLikedSavedImageDownloader
The following dependencies are required:
#+BEGIN_SRC sh
pip install praw pytumblr ImgurPython jsonpickle tornado
pip install praw pytumblr ImgurPython jsonpickle tornado git+https://github.com/ankeshanand/py-gfycat@master
#+END_SRC
You'll want to use Python 3, which for your environment may require you to specify ~pip3~ instead of just ~pip~.
@ -43,11 +43,10 @@ pip install passlib bcrypt argon2_cffi
#+BEGIN_SRC sh
cd redditLikedSavedImageDownloader/
mkdir certificates
openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout certificates/server_jupyter_based.crt.key -out certificates/server_jupyter_based.crt.pem
./Generate_Certificates.sh
#+END_SRC
This step is only required if you want to use SSL, which ensures you have an encrypted connection to the server. You can disable this by opening ~LikedSavedDownloaderServer.py~ and setting ~useSSL=False~.
This step is only required if you want to use SSL, which ensures you have an encrypted connection to the server. You can disable this by opening ~LikedSavedDownloaderServer.py~ and setting ~useSSL = False~.
*** 4. Run the server


+ 28
- 14
imageSaver.py View File

@ -1,13 +1,18 @@
# -*- coding: utf-8 -*-
import sys
import imgurpython as imgur
import logger
import os
import random
import re
import settings
import sys
import utilities
from builtins import str
from crcUtils import signedCrc32
from gfycat.client import GfycatClient
from operator import attrgetter
import imgurpython as imgur
from builtins import str
import logger
import urllib
if sys.version_info[0] >= 3:
@ -135,23 +140,36 @@ def isGfycatUrl(url):
and '.webm' not in url.lower()
and '.gif' not in url.lower()[-4:])
# Lazy initialize in case it's not needed
gfycatClient = None
# Special handling for Gfycat links
# Returns a URL to a webm which can be downloaded by urllib
def convertGfycatUrlToWebM(url):
global gfycatClient
# Change this:
# https://gfycat.com/IndolentScalyIncatern
# https://gfycat.com/IndolentScalyIncatern/
# Into this:
# https://zippy.gfycat.com/IndolentScalyIncatern.webm
# Or maybe this:
# https://giant.gfycat.com/IndolentScalyIncatern.webm
# Look for this key in the HTML document and get whatever src is
# GFYCAT_SOURCE_KEY = '<source id="webmSource" src='
# return findSourceFromHTML(url, GFYCAT_SOURCE_KEY)
# Lazy initialize client
if not gfycatClient and settings.settings['Gfycat_Client_id']:
gfycatClient = GfycatClient(settings.settings['Gfycat_Client_id'],settings.settings['Gfycat_Client_secret'])
# Temporary solution while Gfycat API isn't set up
return "https://giant.gfycat.com/{}.webm".format(url[url.rfind("/") + 1:])
# Still don't have a client?
if not gfycatClient:
# Hacky solution while Gfycat API isn't set up. This breaks if case is wrong
return "https://giant.gfycat.com/{}.webm".format(url[url.rfind("/") + 1:])
else:
# Get the gfyname from the url
matches = re.findall(r'gfycat\.com/([a-zA-Z]+)', url)
if not matches:
logger.log("Gfycat URL {} doesn't seem to match expected URL format")
else:
gfycatUrlInfo = gfycatClient.query_gfy(matches[0])
return gfycatUrlInfo['gfyItem']['mp4Url']
def isGifVUrl(url):
return getFileTypeFromUrl(url) == 'gifv'
@ -196,10 +214,6 @@ def isImgurAlbumUrl(url):
and not getFileTypeFromUrl(url)
and '/a/' in url)
def makeDirIfNonexistant(directory):
if not os.path.exists(directory):
os.makedirs(directory)
# Make sure the filename is alphanumeric or has supported symbols, and is shorter than 45 characters
def safeFileName(filename, file_path = False):
acceptableChars = ['_', ' ']


+ 5
- 0
redditUserImageScraper.py View File

@ -32,8 +32,13 @@ def runLikedSavedDownloader(pipeConnection):
logger.log('No Imgur Client ID and/or Imgur Client Secret was provided, or album download is not'
' enabled. This is required to download imgur albums. They will be ignored. Check'
' settings.txt for how to fill in these values.')
if not settings.settings['Gfycat_client_id']:
logger.log('No Gfycat Client ID and/or Gfycat Client Secret was provided, or album download is not'
' enabled. This is required to download Gfycat media reliably.')
logger.log('Output: ' + settings.settings['Output_dir'])
makeDirIfNonexistant(settings.settings['Output_dir'])
# TODO: Only save one post for early out. Only save once all downloading is done
redditRequestOnlyNewSavedCache = None


+ 24
- 3
settings.py View File

@ -26,6 +26,12 @@ settings = {
'Tumblr_Client_secret' : '',
'Tumblr_Client_token' : '',
'Tumblr_Client_token_secret' : '',
# Gfycat authentication information
# https://developers.gfycat.com/signup/#/apiform
# Requires https://github.com/ankeshanand/py-gfycat
'Gfycat_Client_id' : '',
'Gfycat_Client_secret' : '',
# Disable downloading albums by default.
'Should_download_albums' : False,
@ -123,13 +129,26 @@ settingsStructure = [
('Reddit_Unlike_Liked', 'Unlike/remove upvote after the submission has been recorded'),
('Reddit_Unsave_Saved', 'Unsave submission after it has been recorded'),
('Reddit_Try_Request_Only_New',
"Attempt to only request and download new submissions (those which haven't been downloaded) This uses the Reddit cache files to know what's already been downloaded, so it will only work if you've successfully run the script before"),
"Attempt to only request and download new submissions (those which haven't been downloaded) "
"This uses the Reddit cache files to know what's already been downloaded, so it will only"
"work if you've successfully run the script before"),
'Reddit_Try_Request_Only_New_Saved_Cache_File',
'Reddit_Try_Request_Only_New_Liked_Cache_File']),
('Imgur Auth',
['Imgur_client_id',
('Imgur_client_secret',"These need to be filled in so that the script can download Imgur albums. If not filled in, imgur albums will be ignored. Single images will still be downloaded. I've filled in a public imgur user's information. This isn't safe giving you hooligans the secret, but fuck it. If you want to use your own Imgur Client, sign in to Imgur, then go <a href=\"https://api.imgur.com/oauth2/addclient\">here</a> and create your new client.")]),
('Imgur_client_secret',"These need to be filled in so that the script can download Imgur "
"albums. If not filled in, imgur albums will be ignored. Single images will still be "
"downloaded. I've filled in a public imgur user's information. This isn't safe giving you"
" hooligans the secret, but fuck it. If you want to use your own Imgur Client, sign in to"
" Imgur, then go <a href=\"https://api.imgur.com/oauth2/addclient\">here</a> and create"
" your new client.")]),
('Gfycat Auth',
['Gfycat_client_id',
('Gfycat_client_secret', "These need to be filled in so that the script can download Gfycat"
" media. If not filled in, many Gfycat links will fail to download."
" Go <a href=\"https://developers.gfycat.com/signup/#/apiform\">here</a> to get your API keys.")]),
('Tumblr Auth',
['Tumblr_Client_id',
@ -140,7 +159,9 @@ settingsStructure = [
('Tumblr Settings',
[('Tumblr_Total_requests', requestsInstructions),
('Tumblr_Try_Request_Only_New',
"Attempt to only request and download new submissions (those which haven't been downloaded) This uses the Reddit cache files to know what's already been downloaded, so it will only work if you've successfully run the script before"),
"Attempt to only request and download new submissions (those which haven't been downloaded) "
"This uses the Reddit cache files to know what's already been downloaded, so it will only "
"work if you've successfully run the script before"),
'Tumblr_Try_Request_Only_New_Cache_File']),
('Download Settings',


+ 6
- 0
settings.txt View File

@ -98,6 +98,12 @@ Should_download_albums=True
# If true, do not download single images, only submissions which are imgur albums
Only_download_albums=False
#========================================
# Tumblr Settings
#========================================
Gfycat_Client_id=
Gfycat_Client_secret=
#========================================
# Tumblr Settings
#========================================


+ 4
- 0
utilities.py View File

@ -17,3 +17,7 @@ def sort_naturally(l):
""" Sort the given list in the way that humans expect.
"""
l.sort(key=alphanum_key)
def makeDirIfNonexistant(directory):
if not os.path.exists(directory):
os.makedirs(directory)

Loading…
Cancel
Save