diff --git a/igmirror.py b/igmirror.py index 752d0ab..6f734a3 100644 --- a/igmirror.py +++ b/igmirror.py @@ -1,4 +1,5 @@ #!/usr/bin/python3 +import threading import requests import hashlib import string @@ -21,10 +22,18 @@ def add_igaccount(acc_id): if not os.path.exists(accfile): # get all profile data from instagram acc data = getig_user_data(acc_id) + + # this tells us the user probably don't exist (or also might be a network error?) if len(data.keys()) == 0: print('E| User "{}" does not exist on Instagram'.format(acc_id)) return 2 + # we don't and can't mirror private profiles + # (well, we can if we follow them and follow back, but we just don't need that) + if data['graphql']['user']['is_private']: + print('E| User "{}" is a private account. We just won\'t mirror that!'.format(acc_id)) + return 3 + # get account display name to create it name = data['graphql']['user']['full_name'] name = re.sub(r'[^a-zA-Z0-9_\s]', '', name) @@ -40,17 +49,27 @@ def add_igaccount(acc_id): # save the account login information for updates and mirroring db_set('accounts', acc_id, account) + db_set('posts', acc_id, []) # set Pixelfed account data for the username pixelfed_setpic(acc_id, data['graphql']['user']['profile_pic_url']) pixelfed_setinfo(acc_id, data['graphql']['user']['biography'],\ data['graphql']['user']['external_url']) + threading.Thread(target=pixelfed_dlposts, args=(acc_id, data['graphql']['user'])).start() else: print('W| User "{}" already exists in local database'.format(acc_id)) return 0 def update_igaccount(acc_id): - print('update igaccount') + # if account does not exist, we stop the mirroring process + accfile = './db/accounts/{}'.format(acc_id) + if not os.path.exists(accfile): + print('E| User "'+acc_id+'" has not been created yet, maybe you wanted to call //add ?') + return 1 + + # do it on a thread because it might take long to download the latest posts + data = getig_user_data(acc_id) + threading.Thread(target=pixelfed_dlposts, args=(acc_id, data['graphql']['user'])).start() def update_allaccounts(): print('update all accounts') @@ -88,19 +107,115 @@ def pixelfed_token_url(url='', _cookies=None): _token = re.search(r'name="_token".+value="([^"]+)"', r.text).group(1) return r.cookies, _token +def pixelfed_dlposts(acc_id, data): + ts = [] + for edge in data['edge_owner_to_timeline_media']['edges']: + ts.append(edge['node']['taken_at_timestamp']) + for edge in data['edge_felix_video_timeline']['edges']: + ts.append(edge['node']['taken_at_timestamp']) + ts = sorted(ts) + items = [] + for t in ts: + brkit = False + for edge in data['edge_owner_to_timeline_media']['edges']: + if edge['node']['taken_at_timestamp'] == t: + items.append(edge['node']) + brkit = True + break + if brkit: + continue + for edge in data['edge_felix_video_timeline']['edges']: + if edge['node']['taken_at_timestamp'] == t: + items.append(edge['node']) + break + + # mirror posts from the account (only the last N, without loading more), + # but only the ones that has not already been imported + accposts = db_get('posts', acc_id) + accdata = db_get('accounts', acc_id) + for item in items: + if item['is_video']: + continue + + if item['shortcode'] in accposts: + print('I| skipping IG post {}:{}. Already added'.format(acc_id, item['shortcode'])) + continue + + print('I| processing IG post {}:{}'.format(acc_id, item['shortcode'])) + ig_url = 'https://www.instagram.com/p/{}/'.format(item['shortcode']) + title = item['title'] if 'title' in item else None + media = None + if not item['is_video']: + media = item['display_url'] + else: + media = item['video_url'] + caption = item['edge_media_to_caption']['edges'][0]['node']['text'] \ + if len(item['edge_media_to_caption']['edges']) > 0 else '' + altcaption = item['accessibility_caption'] if 'accessibility_caption' in item else None + + # for now, we only support images (not videos :( ) + if not item['is_video']: + _token, jsdata = pixelfed_postimage(acc_id, media, accdata) + if not jsdata: + print('E| Could not upload media for {}:{}'.format(acc_id, item['shortcode'])) + continue + jsdata['description'] = ig_url + caption = caption[0:136]+'...' if len(caption) > 140 else caption + jsdata['alt'] = altcaption[0:136]+'...' if len(altcaption) > 140 else altcaption + jsdata['cw'] = False + print(jsdata) + r = requests.post('https://'+config()['instance']+'/api/compose/v0/publish',\ + json={"media": [jsdata], "caption": caption, "visibility": "public", "cw": False,\ + "comments_disabled": False, "place": False, "tagged": [],"optimize_media": True},\ + cookies=accdata['cookie'],\ + headers={ + 'Content-Type': 'application/json', + 'X-CSRF-TOKEN': _token, + 'X-Requested-With': 'XMLHttpRequest', + 'X-XSRF-TOKEN': accdata['cookie']['XSRF-TOKEN'] + } + ) + accposts.append(item['shortcode']) + print('I| uploaded media for {}:{} : {}'.format(acc_id, item['shortcode'], r.status_code)) + + print('I| done uploading media for {}'.format(acc_id)) + db_set('posts', acc_id, accposts) + +# upload media and return data +def pixelfed_postimage(acc_id, image_url, accdata=None): + if accdata is None: + accdata = db_get('accounts', acc_id) + cachef = pixelfed_cacheimg(image_url) + _, _token = pixelfed_token_url('', accdata['cookie']) + r = requests.post( 'https://'+config()['instance']+'/api/compose/v0/media/upload',\ + files={'file': open(cachef, 'rb')}, cookies=accdata['cookie'],\ + headers={ + 'X-CSRF-TOKEN': _token, + 'X-Requested-With': 'XMLHttpRequest', + 'X-XSRF-TOKEN': accdata['cookie']['XSRF-TOKEN'] + } + ) + if r.status_code == 200: + return _token, json.loads(r.text) + return None, False + + +# get the image by URL but cache it forever, as if the profile changes the pic +# the url will be different, and therefore, the sum will also be different +def pixelfed_cacheimg(image_url): + cachef = './cache/{}.jpg'.format(md5sum(image_url)) + if not os.path.exists(cachef): + r = requests.get(image_url) + w = open(cachef, 'wb') + w.write(r.content) + w.close() + return cachef + def pixelfed_setpic(acc_id, pic_url, count=0): count += 1 pixelfed_login(acc_id) - # get the image by URL but cache it forever, as if the profile changes the pic - # the url will be different, and therefore, the sum will also be different - cachef = './cache/{}.jpg'.format(md5sum(pic_url)) - if not os.path.exists(cachef): - r = requests.get(pic_url) - w = open(cachef, 'wb') - w.write(r.content) - w.close() - + cachef = pixelfed_cacheimg(pic_url) accdata = db_get('accounts', acc_id) _, _token = pixelfed_token_url('/settings/home', accdata['cookie']) r = requests.post( 'https://'+config()['instance']+'/settings/avatar',\