Download and browse content you love!
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

420 lines
17 KiB

# -*- coding: utf-8 -*-
import os
import time
import operator
# Open this file to change the script settings. DO NOT change the settings below
DEFAULT_SETTINGS_FILENAME = 'settings.txt'
"""
Default settings. Note that these are overridden by the default settings file
"""
settings = {
# Reddit authentication information
'Username' : '',
'Password' : '',
'Client_id' : '',
'Client_secret' : '',
'Reddit_Enabled' : True,
'Reddit_Save_Liked' : True,
'Reddit_Save_Saved' : True,
'Reddit_Save_Comments' : True,
'Reddit_Unlike_Liked': False,
'Reddit_Unsave_Saved': False,
'Reddit_Save_Your_User_Posts': True,
# Total requests to reddit (actual results may vary)
'Reddit_Total_requests' : 500,
# Imgur authentication information
'Imgur_Enabled' : True,
'Imgur_client_id' : '',
'Imgur_client_secret' : '',
# Disable downloading albums by default.
'Should_download_albums' : False,
# If true, do not download single images, only submissions which are imgur albums
'Only_download_albums' : False,
# Tumblr authentication information
'Tumblr_Enabled' : True,
'Tumblr_Client_id' : '',
'Tumblr_Client_secret' : '',
'Tumblr_Client_token' : '',
'Tumblr_Client_token_secret' : '',
# Total requests to Tumblr
'Tumblr_Total_requests' : 500,
# Gfycat authentication information
# https://developers.gfycat.com/signup/#/apiform
# Requires https://github.com/ankeshanand/py-gfycat
'Gfycat_Client_id' : '',
'Gfycat_Client_secret' : '',
# Pixiv
'Pixiv_Enabled' : True,
'Pixiv_username': '',
'Pixiv_password': '',
# Pinterest
'Pinterest_Enabled' : True,
'Pinterest_email': '',
'Pinterest_username': '',
'Pinterest_password': '',
'Pinterest_Try_Request_Only_New': True,
# Youtube DL settings
'Should_download_videos' : True,
'Should_download_youtube_videos' : True,
'Only_download_videos' : False,
# Don't get new stuff, just use the .xml files from last run
'Use_cached_submissions' : False,
'Reddit_cache_file' : 'Reddit_SubmissionCache.bin',
'Tumblr_cache_file' : 'Tumblr_SubmissionCache.bin',
'Pixiv_cache_file' : 'Pixiv_SubmissionCache.bin',
# Attempt to only request and download new submissions (those which haven't been downloaded)
# This uses the Reddit and Tumblr cache files to know what's already been downloaded,
# so it will only work if you've successfully run the script before
'Reddit_Try_Request_Only_New' : True,
'Tumblr_Try_Request_Only_New' : True,
'Pixiv_Try_Request_Only_New' : True,
'Reddit_Try_Request_Only_New_Saved_Cache_File' : 'Reddit_RequestOnlyNewSaved.bin',
'Reddit_Try_Request_Only_New_Liked_Cache_File' : 'Reddit_RequestOnlyNewLiked.bin',
'Tumblr_Try_Request_Only_New_Cache_File' : 'Tumblr_RequestOnlyNew.bin',
'Pixiv_Try_Request_Only_New_Cache_File' : 'Pixiv_RequestOnlyNew.bin',
'Pixiv_Try_Request_Only_New_Private_Cache_File' : 'Pixiv_RequestOnlyNewPrivate.bin',
'Pinterest_Try_Request_Only_New_Cache_File' : 'Pinterest_RequestOnlyNew.bin',
# If the script failed at say 70%, you could use toggle Use_cached_submissions and set this value to
# 69. The script would then restart 69% of the way into the cached submissions nearer to where you
# left off.
# The reason why this isn't default is because there might have been changes to the script which
# made previous submissions successfully download, so we always re-check submissions
'Skip_n_percent_submissions': 0,
# If True, don't actually download the images - just pretend to
'Should_soft_retrieve' : False,
'Only_important_messages' : False,
'Output_dir' : 'output',
'Metadata_output_dir' : 'metadata',
'Database' : 'LikedSaved.db',
# These are gross: for existing output directories, store whether the user has updated their
# database from the JSON files with the new features. These will automatically set themselves
'Database_Has_Imported_Unsupported_Submissions' : False,
'Database_Has_Imported_All_Submissions' : False,
'Database_Has_Imported_Comments' : False,
'Port' : 8888,
'Launch_Browser_On_Startup' : True
}
redditClientSecretInstructions = '''You need OAuth tokens to run the script. To get them follow these steps:</p>
<ol>
<li>Go to <a href="https://www.reddit.com/prefs/apps/">Reddit app preferences</a> (while signed in to reddit)</li>
<li>Scroll down to the bottom and click "create app" (something like that)</li>
<li>Fill in the fields as such:</li>
<ul>
<li><b>name:</b> Content Collector</li>
<li>Choose <b>"script"</b> as the type</li>
<li><b>about url:</b> https://github.com/makuto/Liked-Saved-Image-Downloader</li>
<li><b>redirect uri:</b> http://localhost:8080</li>
</ul>
<li>Click create app</li>
<li>Copy the text which is right below "personal use script" for Client ID</li>
<li>Copy the secret for Client Secret as well</li>
</ol>
<p class="optionComment">Yes, this is painful, but it's for your own security
'''
tumblrClientSecretInstructions = '''
Follow the same procedure as reddit for Tumblr:</p>
<ol>
<li><a href="https://www.tumblr.com/oauth/apps">Register the app</a></li>
<li>Then go <a href="https://api.tumblr.com/console">here</a> to get your tokens</li>
</ol>
<p class="optionComment">Refer to the <a href="https://github.com/tumblr/pytumblr">PyTumblr page</a> for more details).
'''
requestsInstructions = 'Increase this value to get more submissions'
# This is provides metadata to create nice sections for the web interface
# [('header', ['option_to_include', ('another_option', 'this one has a comment')])]
settingsStructure = [
('Output',
[('Output_dir', 'All images, videos, and comments will be saved to this directory.'
' You will have to restart the server whenever you change this value when using the Random Image Browser'),
('Metadata_output_dir',
'Save JSON files with content metadata (source, author, URL, etc.) in this directory')]),
('Reddit Authentication',
['Reddit_Enabled',
'Username',
'Password',
'Client_id',
('Client_secret', redditClientSecretInstructions)]),
('Reddit Settings',
[('Reddit_Total_requests', requestsInstructions),
'Reddit_Save_Liked',
'Reddit_Save_Saved',
'Reddit_Save_Comments',
('Reddit_Unlike_Liked', 'Unlike/remove upvote after the submission has been recorded'),
('Reddit_Unsave_Saved', 'Unsave submission after it has been recorded'),
('Reddit_Try_Request_Only_New',
"Attempt to only request and download new submissions (those which haven't been downloaded). "
"This uses the Reddit cache files to know what's already been downloaded, so it will only "
"work if you've successfully run the script before"),
('Reddit_Save_Your_User_Posts',
'Save posts with the same author as the current user. Disabling this will cause posts with'
' your username as author to be ignored.')]),
('Imgur Authentication',
['Imgur_Enabled',
'Imgur_client_id',
('Imgur_client_secret',"These need to be filled in so that the script can download Imgur "
"albums. If not filled in, imgur albums will be ignored. Single images will still be "
"downloaded. If you want to use Imgur, sign in to Imgur, then go "
"<a href=\"https://api.imgur.com/oauth2/addclient\">here</a> and create your new client.")]),
('Gfycat Authentication',
['Gfycat_Client_id',
('Gfycat_Client_secret', "These need to be filled in so that the script can download Gfycat"
" media. If not filled in, many Gfycat links will fail to download."
" Go <a href=\"https://developers.gfycat.com/signup/#/apiform\">here</a> to get your API keys.")]),
('Tumblr Authentication',
['Tumblr_Enabled',
'Tumblr_Client_id',
'Tumblr_Client_secret',
'Tumblr_Client_token',
('Tumblr_Client_token_secret', tumblrClientSecretInstructions)]),
('Tumblr Settings',
[('Tumblr_Total_requests', requestsInstructions),
('Tumblr_Try_Request_Only_New',
"Attempt to only request and download new submissions (those which haven't been downloaded) "
"This uses the Tumblr cache files to know what's already been downloaded, so it will only "
"work if you've successfully run the script before")]),
('Pixiv Authentication',
['Pixiv_Enabled',
'Pixiv_username',
'Pixiv_password']),
('Pixiv Settings',
[('Pixiv_Try_Request_Only_New',
"Attempt to only request and download new submissions (those which haven't been downloaded) "
"This uses the Pixiv cache files to know what's already been downloaded, so it will only "
"work if you've successfully run the script before")]),
('Pinterest Authentication',
['Pinterest_Enabled',
'Pinterest_email',
('Pinterest_username', 'Look in the pinterest url while visiting your profile'),
'Pinterest_password']),
('Pinterest Settings',
[('Pinterest_Try_Request_Only_New',
"Attempt to only request and download new submissions (those which haven't been downloaded) "
"This uses the Pinterest cache files to know what's already been downloaded, so it will only "
"work if you've successfully run the script before")]),
('Download Settings',
[
'Should_download_albums',
('Only_download_albums',
'If true, do not download single images, only submissions which are imgur albums'),
('Should_download_videos', 'Use <a href="https://github.com/ytdl-org/youtube-dl/">Youtube-dl</a> '
'to attempt to download videos'),
('Should_download_youtube_videos',
'If <b>Should download videos</b>, whether or not to download YouTube videos'),
('Only_download_videos', 'Do not download any images, only supported videos')
]),
("Server Settings",
[
('Port', 'The port number the server will listen on. Note that ports 80 (HTTP default) and 443 (HTTPS'
' default) require the server to be run as root. <b>You must restart the server for this change to take effect.</b>'),
('Launch_Browser_On_Startup', 'Open default browser to localhost:port once the server has started'),
('Database', "Location of the database"),
('Database_Has_Imported_Unsupported_Submissions',
'Setting this to false will cause a reimport of all <i>UnsupportedSubmissions</i> json files in both '
'<b>Output dir</b> and <b>Metadata output dir</b>. This value is automatically set to True after '
'a successful import.'),
('Database_Has_Imported_All_Submissions',
'Setting this to false will cause a reimport of all <i>AllSubmissions</i> json files in both '
'<b>Output dir</b> and <b>Metadata output dir</b>. This value is automatically set to True after '
'a successful import.'),
('Database_Has_Imported_Comments',
'Setting this to false will cause a reimport of all <i>Comments</i> json files in both '
'<b>Output dir</b> and <b>Metadata output dir</b> This value is automatically set to True after '
'a successful import.')
]),
('Debugging and Development',
[
('Only_important_messages', 'Output minimal information to the console'),
('Skip_n_percent_submissions', "If the script failed at say 70%, you could use toggle "
"Use_cached_submissions and set this value to 69. The script would then restart 69% of"
" the way into the cached submissions nearer to where you left off. The reason why this"
" isn't default is because there might have been changes to the script which made"
" previous submissions successfully download, so we always re-check submissions"),
('Use_cached_submissions', 'Do not get new stuff, just use the cache files from last run'),
'Reddit_cache_file',
'Tumblr_cache_file',
'Pixiv_cache_file',
'Reddit_Try_Request_Only_New_Saved_Cache_File',
'Reddit_Try_Request_Only_New_Liked_Cache_File',
'Tumblr_Try_Request_Only_New_Cache_File',
'Pixiv_Try_Request_Only_New_Cache_File',
'Pixiv_Try_Request_Only_New_Private_Cache_File',
'Pinterest_Try_Request_Only_New_Cache_File',
('Should_soft_retrieve', "If True, don't actually download the images - just pretend to"),
]),
]
def valueAfterTag(line, optionTag):
return line[len(optionTag) + 1:].strip(' \t\n')
def lineHasOption(line, optionTag):
return (optionTag.lower() in line.lower()
and line[:len(optionTag) + 1].lower() == optionTag.lower() + '=')
def getBooleanOption(line, optionTag):
if lineHasOption(line, optionTag):
value = valueAfterTag(line, optionTag).lower()
return True if (value == 'true' or value == '1') else False
return False
def getStringOption(line, optionTag):
if lineHasOption(line, optionTag):
return valueAfterTag(line, optionTag)
return ''
def getIntegerOption(line, optionTag):
if lineHasOption(line, optionTag):
return int(valueAfterTag(line, optionTag))
return -1
def readSettings(settingsFileName):
global settings
settingsFile = open(settingsFileName, 'r')
lines = settingsFile.readlines()
settingsFile.close()
for line in lines:
# Ignore blank or commented lines
if not len(line.strip(' \t\n')) or line[0] == '#':
continue
for option in settings:
if lineHasOption(line, option):
if type(settings[option]) == bool:
settings[option] = getBooleanOption(line, option)
break
elif type(settings[option]) == int:
settings[option] = getIntegerOption(line, option)
break
elif type(settings[option]) == str:
settings[option] = getStringOption(line, option)
break
def hasRedditSettings():
return (settings["Reddit_Enabled"] and settings['Username'] and settings['Password'] and
settings['Client_id'] and settings['Client_secret'])
def hasTumblrSettings():
return (settings["Tumblr_Enabled"] and settings['Tumblr_Client_id'] and settings['Tumblr_Client_secret'] and
settings['Tumblr_Client_token'] and settings['Tumblr_Client_token_secret'])
def hasImgurSettings():
return (settings["Imgur_Enabled"] and settings['Imgur_client_id'] and settings['Imgur_client_secret'])
def hasPixivSettings():
return (settings["Pixiv_Enabled"] and settings['Pixiv_username'] and settings['Pixiv_password'])
def hasPinterestSettings():
return (settings["Pinterest_Enabled"] and settings['Pinterest_username'] and settings['Pinterest_password']
and settings['Pinterest_email'])
# To make sure I don't accidentally commit my settings.txt, it's marked LOCAL_,
# which is in .gitignore
hiddenSettingsFilename = "LOCAL_settings.txt"
# Not intended to be edited by a human, definitely shouldn't be checked in
serverSettingsFilename = 'LOCAL_settings_from_server.txt'
# Returns which settings file should be used
def getSettingsFilename():
candidates = []
if os.path.isfile(hiddenSettingsFilename):
candidates.append(hiddenSettingsFilename)
if os.path.isfile(serverSettingsFilename):
candidates.append(serverSettingsFilename)
if os.path.isfile(DEFAULT_SETTINGS_FILENAME):
candidates.append(DEFAULT_SETTINGS_FILENAME)
# No settings files at all; create one
if not candidates:
writeServerSettings()
if os.path.isfile(serverSettingsFilename):
candidates.append(serverSettingsFilename)
else:
print("Error: can't seem to create settings file")
return None
# Choose the most recently edited file
# From http://code.activestate.com/recipes/576804-find-the-oldest-or-yougest-of-a-list-of-files/
timeNow = time.time()
newestFile = candidates[0], timeNow - os.path.getctime(candidates[0])
for fileName in candidates:
age = timeNow - os.path.getctime(fileName)
if operator.lt(age, newestFile[1]):
newestFile = fileName, age
return newestFile[0]
def writeServerSettings():
settingsOutput = []
for option in settings:
optionValue = settings[option]
if type(settings[option]) == bool:
optionValue = 'True' if optionValue else 'False'
settingsOutput.append('{}={}\n'.format(option, optionValue))
serverSettings = open(serverSettingsFilename, 'w')
serverSettings.writelines(settingsOutput)
serverSettings.close()
print('Wrote settings to ' + serverSettingsFilename)
def getSettings():
settingsFilename = getSettingsFilename()
print('Reading settings from settings file with most recent timestamp, which was:\n'
+ settingsFilename
+ "\nIf you want to read from a different settings file, make it more recent")
readSettings(settingsFilename)