From 3efba57cf3c31787f358178c927cd02a27332ea8 Mon Sep 17 00:00:00 2001 From: Untone Date: Mon, 4 Oct 2021 20:06:05 +0300 Subject: [PATCH] migration api upgraded --- .gitignore | 2 +- migrate.py | 68 +++++++++++++------- migration/.DS_Store | Bin 0 -> 6148 bytes migration/content/.DS_Store | Bin 0 -> 6148 bytes migration/content/article/.gitkeep | 0 migration/content/image/.gitkeep | 0 migration/content/music/.gitkeep | 0 migration/content/prose/.gitkeep | 0 migration/content/video/.gitkeep | 0 migration/tables/__init__.py | 2 +- migration/tables/content_item_categories.py | 2 +- migration/tables/content_items.py | 46 +++++++------ migration/tables/tags.py | 4 +- 13 files changed, 77 insertions(+), 47 deletions(-) create mode 100644 migration/.DS_Store create mode 100644 migration/content/.DS_Store create mode 100644 migration/content/article/.gitkeep create mode 100644 migration/content/image/.gitkeep create mode 100644 migration/content/music/.gitkeep create mode 100644 migration/content/prose/.gitkeep create mode 100644 migration/content/video/.gitkeep diff --git a/.gitignore b/.gitignore index 6655941d..a97e33e8 100644 --- a/.gitignore +++ b/.gitignore @@ -135,4 +135,4 @@ discours.crt Pipfile.lock migration/data -migration/content \ No newline at end of file +migration/content/**/*.md \ No newline at end of file diff --git a/migrate.py b/migrate.py index 92354091..a8ce0e75 100644 --- a/migrate.py +++ b/migrate.py @@ -3,16 +3,27 @@ from migration.tables.users import migrate as migrateUser from migration.tables.content_items import migrate as migrateShout from migration.tables.content_item_categories import migrate as migrateTopic from migration.utils import DateTimeEncoder +from orm import Community def users(limit): print('migrating users...') data = json.loads(open('migration/data/users.json').read()) newdata = {} + exportData = {} counter = 0 + # limit = 100 #try: for entry in data: oid = entry['_id'] - newdata[oid] = migrateUser(entry) + user = migrateUser(entry) + newdata[oid] = user + del user['password'] + del user['notifications'] + # del user['oauth'] + del user['emailConfirmed'] + del user['username'] + del user['email'] + exportData[user['slug']] = user counter += 1 if counter > limit: break @@ -20,25 +31,27 @@ def users(limit): # print(str(counter) + '/' + str(len(data)) + ' users entries were migrated') # print('try to remove database first') open('migration/data/users.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) ) + open('../src/data/authors.json','w').write( json.dumps(exportData, cls=DateTimeEncoder) ) print(str(counter) + ' users entries were migrated') -def topics(limit): +def topics(): print('migrating topics...') data = json.loads(open('migration/data/content_item_categories.json').read()) newdata = {} + exportData = {} counter = 0 try: for entry in data: oid = entry['_id'] newdata[oid] = migrateTopic(entry) + exportData[entry['slug']] = newdata[oid] counter += 1 - if counter > limit: - break except Exception: print(str(counter) + '/' + str(len(data)) + ' topics were migrated') print('try to remove database first') open('migration/data/topics.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) ) + open('../src/data/topics.json','w').write( json.dumps(exportData, cls=DateTimeEncoder) ) print(str(counter) + ' topics were migrated') def shouts(limit): @@ -47,20 +60,23 @@ def shouts(limit): discoursAuthor = 0 data = json.loads(open('migration/data/content_items.json').read()) newdata = {} - print(str(len(data)) + ' entries was loaded. now migrating...') + print(str(len(data)) + ' entries loaded. now migrating...') errored = [] - + exportData = {} for entry in data: try: oid = entry['_id'] - newdata[oid] = migrateShout(entry) - counter += 1 - + shout = migrateShout(entry) + newdata[oid] = shout author = newdata[oid]['authors'][0]['slug'] - if author == 'discours': - discoursAuthor += 1 line = str(counter) + ': ' + newdata[oid]['slug'] + " @" + str(author) - print(line) + if shout['layout'] == 'article': + counter += 1 + exportData[shout['slug']] = shout + print(line) + # counter += 1 + if author == 'discours.io': + discoursAuthor += 1 open('./shouts.id.log','a').write(line + '\n') if counter > limit: break @@ -70,29 +86,33 @@ def shouts(limit): raise Exception open('migration/data/shouts.dict.json','w').write( json.dumps(newdata, cls=DateTimeEncoder) ) + open('../src/data/articles.json','w').write( json.dumps(exportData, cls=DateTimeEncoder) ) print(str(counter) + ' shouts were migrated') - print(str(discoursAuthor) + ' from them by @discours') + print(str(discoursAuthor) + ' from them by @discours.io') print(str(len(errored)) + ' shouts without authors') if __name__ == '__main__': import sys if len(sys.argv) > 1: - limit = int(sys.argv[2]) if sys.argv[1] == "users": - users(limit) + users(668) elif sys.argv[1] == "topics": - topics(limit) + topics() elif sys.argv[1] == "shouts": - shouts(limit) - elif sys.argv[1] == "comments": - comments(limit) - pass + Community.create(**{ + 'slug': 'discours.io', + 'name': 'Дискурс', + 'pic': 'https://discours.io/images/logo-min.svg', + 'createdBy': '0', + 'createdAt': ts + }) + shouts(3626) elif sys.argv[1] == "all": - topics(limit) - users(limit) - shouts(limit) + topics() + users(668) + shouts(3626) elif sys.argv[1] == "bson": import migration.bson2json bson2json.json_tables() else: - print('usage: python migrate.py ') \ No newline at end of file + print('usage: python migrate.py ') \ No newline at end of file diff --git a/migration/.DS_Store b/migration/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..7dc3eb8a1b22eed8096a248b3e13387e0d4d142b GIT binary patch literal 6148 zcmeHKy-ve05VqSw6Cp$gBszLw>(C!WC#0y9fek4L20(2>A{wd0s7ou|0+RrlE+SzR!9QSV@9@D4n`9*Wf;cuv9%is)N!T7Sw zdv_EMWqbpXS=B6?kQg8ah=CPnz#Mzl#)_|#mPZT_1HWeg&j$gD=orj3s-puM{CvcC z2@wTsd`ln-gO0&mBSb*BE(O%3+&nS3E(gCbd5*zcqb_G$%?#t1nYnqPa5X#lg-U1K z(MT;ZKnyH0P}JQD-v38GzyBADs7DMC11rSVpoSRuQwBZ&g62~9 literal 0 HcmV?d00001 diff --git a/migration/content/.DS_Store b/migration/content/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5b679579ced7ed07fa64a163f8ed9922cfa15c7f GIT binary patch literal 6148 zcmeHKyG{c^3>-s>APQ1a?k~U>tfEj*^8uhhftym4H69+OZMz~ zJ-4|j&M^R6zCGRm3jlMvBi=ns&ClH@c2*f9()o-X20WqR_I{XDpH4XU0vo*G70-wK z{cbz#w>Y|G%UEF#jKuw37l-;9n_Vv(>|D$yci0I(a$owT-@}d(8*kjq9K= mL^~!%JLbmQ@pTksUGp{1d*PTEbmoIj)Sm&@MJ5IQT7fSq6&zFm literal 0 HcmV?d00001 diff --git a/migration/content/article/.gitkeep b/migration/content/article/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/migration/content/image/.gitkeep b/migration/content/image/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/migration/content/music/.gitkeep b/migration/content/music/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/migration/content/prose/.gitkeep b/migration/content/prose/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/migration/content/video/.gitkeep b/migration/content/video/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/migration/tables/__init__.py b/migration/tables/__init__.py index 35971e44..6cc37870 100644 --- a/migration/tables/__init__.py +++ b/migration/tables/__init__.py @@ -1 +1 @@ -__all__ = ["users"] \ No newline at end of file +__all__ = ["users", "tags", "content_items", "comments"], \ No newline at end of file diff --git a/migration/tables/content_item_categories.py b/migration/tables/content_item_categories.py index ffb1db5a..2271005a 100644 --- a/migration/tables/content_item_categories.py +++ b/migration/tables/content_item_categories.py @@ -13,7 +13,7 @@ def migrate(entry): 'slug': entry['slug'], 'createdBy': entry['createdBy'], # NOTE: uses an old user id 'createdAt': entry['createdAt'], - 'value': entry['title'].lower(), + 'title': entry['title'].lower(), 'parents': [], 'children': [], 'old_id': entry['_id'] diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py index 68416b4d..d878b80d 100644 --- a/migration/tables/content_items.py +++ b/migration/tables/content_items.py @@ -14,7 +14,7 @@ from orm.base import local_session users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read()) users_dict['0'] = { 'id': 9999999, - 'slug': 'discours', + 'slug': 'discours.io', 'name': 'Дискурс', 'userpic': 'https://discours.io/images/logo-mini.svg' } @@ -56,6 +56,7 @@ def migrate(entry, limit=3626, start=0): r = { 'layout': type2layout[entry['type']], 'title': entry['title'], + 'community': 0, 'authors': [], 'topics': [], 'published': entry.get('published', False), @@ -87,10 +88,9 @@ def migrate(entry, limit=3626, start=0): if body_orig == '': print('EMPTY BODY!') else: - # body_html = str(BeautifulSoup(body_orig, features="html.parser")) - # body = html2text(body_orig).replace('****', '**') + body_html = str(BeautifulSoup(body_orig, features="html.parser")) + r['body'] = html2text(body_html).replace('****', '**') r['old_id'] = entry.get('_id') - r['body'] = body_orig else: print(r['slug'] + ': literature has no media') elif entry.get('type') == 'Video': @@ -109,8 +109,8 @@ def migrate(entry, limit=3626, start=0): if r.get('body') is None: body_orig = entry.get('body', '') - body_html = BeautifulSoup(body_orig, features="html.parser") - r['body'] = body_html # html2text(body_orig).replace('****', '**') + body_html = str(BeautifulSoup(body_orig, features="html.parser")) + r['body'] = html2text(body_html).replace('****', '**') r['old_id'] = entry.get('_id') body = r.get('body') @@ -141,12 +141,12 @@ def migrate(entry, limit=3626, start=0): if user is None: user = session.query(User).filter(User.slug == authordata['slug']).first() - slug = user.slug - name = user.name + slug = user['slug'] + name = user['name'] userpic = user.userpic else: # no application, no author! - slug = 'discours' + slug = 'discours.io' name = 'Дискурс' userpic = 'https://discours.io/images/logo-mini.svg' with local_session() as session: @@ -160,19 +160,22 @@ def migrate(entry, limit=3626, start=0): metadata = {} metadata['title'] = r.get('title') metadata['authors'] = r.get('authors') + metadata['createdAt'] = entry.get('createdAt', ts) + metadata['layout'] = type2layout[entry['type']] if r.get('cover', False): metadata['cover'] = r.get('cover') post = frontmatter.Post(body, **metadata) dumped = frontmatter.dumps(post) if entry['published']: - if r.get('old_id', None): - ext = 'html' - content = str(body).replace('

', '').replace('

', '') - else: - ext = 'md' - content = dumped - open('migration/content/' + entry['type'].lower() + '/' + r['slug'] + '.' + ext, 'w').write(content) + #if r.get('old_id', None): + # ext = 'html' + # content = str(body).replace('

', '').replace('

', '') + #else: + ext = 'md' + content = dumped + open('migration/content/' + metadata['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content) + try: shout_dict = r.copy() @@ -186,8 +189,15 @@ def migrate(entry, limit=3626, start=0): shout_dict['publishedAt'] = parse(entry.get('publishedAt')) else: shout_dict['publishedAt'] = ts - s = Shout.create(**shout_dict) - r['id'] = s.id + del shout_dict['published'] + del shout_dict['views'] # FIXME + del shout_dict['rating'] # FIXME + del shout_dict['ratings'] + try: + s = Shout.create(**shout_dict) + r['id'] = s.id + except: + pass except: r['body'] = 'body moved' print(r) diff --git a/migration/tables/tags.py b/migration/tables/tags.py index 5ef92c53..6e6d80f0 100644 --- a/migration/tables/tags.py +++ b/migration/tables/tags.py @@ -9,12 +9,12 @@ def migrate(entry): children: [String] # and children } ``` - creator = get_new_user_id(entry['cratedBy']) + creator = get_new_user_id(entry['createdBy']) return { 'slug': entry['slug'], 'createdBy': creator_id, # NOTE: uses an old user id 'createdAt': entry['createdAt'], - 'value': entry['value'].lower(), + 'title': entry['value'].lower(), 'parents': [], 'children': [] } \ No newline at end of file