ig-pixelfed-mirror/igmirror.py

#!/usr/bin/python3
import requests
import hashlib
import string
import random
import time
import json
import os
import re

def add_igaccount(acc_id):
    accfile = './db/accounts/{}'.format(acc_id)
    if not os.path.exists('./scripts/user_create'):
        print('E| You may need to initialize the server environment first')
        return 1

    if not os.path.exists(accfile):
        data = getig_user_data(acc_id)
        name = data['graphql']['user']['full_name']
        name = re.sub(r'[^a-zA-Z0-9_\s]', '', name)
        account = {
            'name': name,
            'username': acc_id,
            'password': random_string()
        }
        os.system('./scripts/user_create \'{}\' \'{}\' \'{}\''.format(\
            account['name'], account['username'], account['password']))
        w = open(accfile, 'w')
        w.write(json.dumps(account))
        w.close()
    else:
        print('W| User "{}" already exists in local database'.format(acc_id))
    return 0

def random_string(count=32):
    return ''.join(random.choices(string.ascii_uppercase + string.ascii_lowercase + string.digits, k=count))

# get all profile data from user:
#  - display name
#  - bio description
#  - shared posts (images/videos)
#  - much more info...
def getig_user_data(acc_id):
    return json.loads(
        instagram_get('/{}/?__a=1'.format(acc_id), 120000)
    )

# runs a basic GET request emulating Tor Browser
def instagram_get(url, CACHE_SECS=600):
    headers = get_random_headers()
    default_headers = {
        'Accept':		'*/*',
        'Accept-Language':	'en-US,en;q=0.5',
        'Accept-Encoding':	'gzip, deflate, br',
        'Connection':		'keep-alive',
        'Cache-Control':	'no-cache',
        'Pragma':		'no-cache',
        'Host':			'www.instagram.com',
        'Referer':		'https://www.instagram.com/',
        'User-Agent':		'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0'
    }
    for key in default_headers.keys():
        if not key in headers:
            headers[key] = default_headers[key]
    url = 'https://www.instagram.com{}'.format(url)
    cachef = './cache/'+hashlib.md5(url.encode()).hexdigest()
    now = str(time.time())
    now = int(now[:now.index('.')])
    if os.path.exists(cachef):
        cache = readf(cachef).splitlines()
        ctime = int(cache[0])
        if now < ctime:
            return '\n'.join(cache[1:])

    r = requests.get(url, headers=headers)
    resp = r.text
    w = open(cachef, 'w')
    w.write(str(now+CACHE_SECS) + '\n')
    w.write(resp)
    return resp

def get_random_headers():
    a = os.listdir('./headers')
    rin = 0
    if len(a)-1 > 0:
        rin = random.randint(0, len(a)-1)
    lines = readf('./headers/{}'.format(a[rin])).splitlines()
    headers = {}
    for line in lines:
        reg = re.search('(^[^:]+):(.*)', line)
        headers[reg.group(1).strip()] = reg.group(2).strip()
    return headers

def readf(f):
    r = open(f,'r')
    c = r.read().strip()
    r.close()
    return c

if __name__ == '__main__':
    main()