initial commit

2021-02-06 21:12:19 +01:00 · 2021-02-06 21:12:19 +01:00 · 02eff5188a
commit 02eff5188a
3 changed files with 82 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+cache/*
+headers/*
--- a/headers.example
+++ b/headers.example
@ -0,0 +1,3 @@
+X-IG-App-ID: 0000000000000000
+X-IG-WWW-Claim: hmac.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+Cookie: ig_cb=2; ig_did=0CCCC-CCCCCC-CCCCCCC; csrftoken=abc45644cc4cc4c4c4c4cc4; mid=YaaaaErt45xx454A_ee4aeExE4; rur=FRC; ds_user_id=4566544444; sessionid=4566544444%5555555555%5654
--- a/mirror.py
+++ b/mirror.py
@ -0,0 +1,77 @@
+#!/usr/bin/python3
+import requests
+import hashlib
+import random
+import time
+import json
+import os
+import re
+
+def main():
+    data = getig_user_data('shakira')
+    print(json.dumps(data, indent=4))
+
+# get all profile data from user:
+#  - display name
+#  - bio description
+#  - shared posts (images/videos)
+#  - much more info...
+def getig_user_data(user_id):
+    return json.loads(
+        instagram_get('/{}/?__a=1'.format(user_id), 120000)
+    )
+
+# runs a basic GET request emulating Tor Browser
+def instagram_get(url, CACHE_SECS=600):
+    headers = get_random_headers()
+    default_headers = {
+        'Accept':		'*/*',
+        'Accept-Language':	'en-US,en;q=0.5',
+        'Accept-Encoding':	'gzip, deflate, br',
+        'Connection':		'keep-alive',
+        'Cache-Control':	'no-cache',
+        'Pragma':		'no-cache',
+        'Host':			'www.instagram.com',
+        'Referer':		'https://www.instagram.com/',
+        'User-Agent':		'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0'
+    }
+    for key in default_headers.keys():
+        if not key in headers:
+            headers[key] = default_headers[key]
+    url = 'https://www.instagram.com{}'.format(url)
+    cachef = './cache/'+hashlib.md5(url.encode()).hexdigest()
+    now = str(time.time())
+    now = int(now[:now.index('.')])
+    if os.path.exists(cachef):
+        cache = readf(cachef).splitlines()
+        ctime = int(cache[0])
+        if now < ctime:
+            return '\n'.join(cache[1:])
+
+    r = requests.get(url, headers=headers)
+    resp = r.text
+    w = open(cachef, 'w')
+    w.write(str(now+CACHE_SECS) + '\n')
+    w.write(resp)
+    return resp
+
+def get_random_headers():
+    a = os.listdir('./headers')
+    rin = 0
+    if len(a)-1 > 0:
+        rin = random.randint(0, len(a)-1)
+    lines = readf('./headers/{}'.format(a[rin])).splitlines()
+    headers = {}
+    for line in lines:
+        reg = re.search('(^[^:]+):(.*)', line)
+        headers[reg.group(1).strip()] = reg.group(2).strip()
+    return headers
+
+def readf(f):
+    r = open(f,'r')
+    c = r.read().strip()
+    r.close()
+    return c
+
+if __name__ == '__main__':
+    main()