From fe28c3918cea28e5bd441b2e0ddd5d0b3f8a355a Mon Sep 17 00:00:00 2001 From: Untone Date: Fri, 8 Oct 2021 12:58:19 +0300 Subject: [PATCH] upgrade --- migrate.py | 41 ++++++++++++-------- migration/bson2json.py | 8 ++-- migration/tables/content_items.py | 64 +++++++++++++++++-------------- 3 files changed, 66 insertions(+), 47 deletions(-) diff --git a/migrate.py b/migrate.py index 5ed76323..9e6c4145 100644 --- a/migrate.py +++ b/migrate.py @@ -2,8 +2,9 @@ import json import base64 import re +import frontmatter from migration.tables.users import migrate as migrateUser -from migration.tables.content_items import migrate as migrateShout +from migration.tables.content_items import get_metadata, migrate as migrateShout from migration.tables.content_item_categories import migrate as migrateCategory from migration.tables.tags import migrate as migrateTag from migration.utils import DateTimeEncoder @@ -70,16 +71,19 @@ def topics(): print('migrating topics...') cat_data = json.loads( open('migration/data/content_item_categories.json').read()) - tag_data = json.loads(open('migration/data/tags.json').read()) - newdata = {} + # tag_data = json.loads(open('migration/data/tags.json').read()) + new_data = {} + old_data = {} counter = 0 try: for cat in cat_data: topic = migrateCategory(cat) - newdata[topic['slug']] = topic + old_data[topic['old_id']] = topic + new_data[topic['slug']] = topic counter += 1 except Exception: print('cats exception, try to remove database first') + ''' try: for tag in tag_data: topic = migrateTag(tag) @@ -88,14 +92,19 @@ def topics(): except Exception: print('tags exception, try to remove database first') raise Exception - export_list = sorted(newdata.items(), key=lambda item: str( - item[1]['createdAt']))[-10:] + ''' + export_list = sorted(new_data.items(), key=lambda item: str( + item[1]['createdAt'])) open('migration/data/topics.dict.json', - 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) + 'w').write(json.dumps(old_data, cls=DateTimeEncoder)) open('../src/data/topics.json', 'w').write(json.dumps(dict(export_list), - cls=DateTimeEncoder, indent=4, sort_keys=True, ensure_ascii=False)) + cls=DateTimeEncoder, + indent=4, + sort_keys=True, + ensure_ascii=False)) print(str(counter) + ' from ' + str(len(cat_data)) + - ' tags and ' + str(len(tag_data)) + ' cats were migrated') + #' tags and ' + str(len(tag_data)) + + ' cats were migrated') print(str(len(export_list)) + ' topics were exported') @@ -110,7 +119,7 @@ def shouts(): errored = [] for entry in content_data: try: - (shout, content) = migrateShout(entry) + shout = migrateShout(entry) newdata[shout['slug']] = shout author = newdata[shout['slug']]['authors'][0]['slug'] line = str(counter+1) + ': ' + shout['slug'] + " @" + str(author) @@ -127,12 +136,14 @@ def shouts(): limit = int(sys.argv[2]) if len(sys.argv) > 2 else len(content_data) except ValueError: limit = len(content_data) - export_list = sorted(newdata.items( - ), key=lambda item: item[1]['createdAt'] if item[1]['layout'] == 'article' else OLD_DATE)[:limit] + export_list = [i for i in newdata.items() if i[1]['layout'] == 'article' and i[1]['published']] + export_list = sorted(export_list, key=lambda item: item[1]['createdAt'] or OLD_DATE, reverse=True)[:limit] export_clean = {} - for slug, a in dict(export_list).items(): - export_clean[slug] = extract_images(a) - open('../content/discours.io/'+slug+'.md', 'w').write(content) + for (slug, a) in export_list: + export_clean[a['slug']] = extract_images(a) + metadata = get_metadata(a) + content = frontmatter.dumps(frontmatter.Post(a['body'], **metadata)) + open('../content/discours.io/'+a['slug']+'.md', 'w').write(content) open('migration/data/shouts.dict.json', 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) open('../src/data/articles.json', 'w').write(json.dumps(dict(export_clean), diff --git a/migration/bson2json.py b/migration/bson2json.py index 27f1b327..1f2f94ed 100644 --- a/migration/bson2json.py +++ b/migration/bson2json.py @@ -3,10 +3,10 @@ import datetime import json import importlib -import DateTimeEncoder from utils +from migration.utils import DateTimeEncoder def json_tables(): - print('creating json files at data/') + print('creating json files at migration/data/') data = { "content_items": [], "content_item_categories": [], @@ -17,13 +17,13 @@ def json_tables(): } for table in data.keys(): lc = [] - with open('data/'+table+'.bson', 'rb') as f: + with open('migration/data/'+table+'.bson', 'rb') as f: bs = f.read() base = 0 while base < len(bs): base, d = bson.decode_document(bs, base) lc.append(d) data[table] = lc - open('data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder)) + open('migration/data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder)) return data diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py index 7a1d16e4..c9d8cb36 100644 --- a/migration/tables/content_items.py +++ b/migration/tables/content_items.py @@ -12,6 +12,7 @@ from sqlalchemy.exc import IntegrityError from orm.base import local_session users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read()) +topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed users_dict['0'] = { 'id': 9999999, 'slug': 'discours.io', @@ -31,6 +32,16 @@ type2layout = { } +def get_metadata(r): + metadata = {} + metadata['title'] = r.get('title') + metadata['authors'] = r.get('authors') + metadata['createdAt'] = r.get('createdAt', ts) + metadata['layout'] = r['layout'] + if r.get('cover', False): + metadata['cover'] = r.get('cover') + return metadata + def migrate(entry): ''' type Shout { @@ -66,7 +77,7 @@ def migrate(entry): 'views': entry.get('views', 0), 'rating': entry.get('rating', 0), 'ratings': [], - 'createdAt': '2016-03-05 22:22:00.350000' + 'createdAt': entry.get('createdAt', '2016-03-05 22:22:00.350000') } r['slug'] = entry.get('slug', '') body_orig = entry.get('body', '') @@ -78,6 +89,10 @@ def migrate(entry): print('NO SLUG ERROR') # print(entry) raise Exception + try: + r['topics'].append(topics_dict[entry['category']]['slug']) + except Exception: + print(entry['category']) if entry.get('image') is not None: r['cover'] = entry['image']['url'] if entry.get('thumborId') is not None: @@ -99,16 +114,16 @@ def migrate(entry): else: print(r['slug'] + ': literature has no media') elif entry.get('type') == 'Video': - m = entry['media'][0] - yt = m.get('youtubeId', '') - vm = m.get('vimeoId', '') - videoUrl = 'https://www.youtube.com/watch?v=' + yt if yt else '#' - if videoUrl == '#': - videoUrl = 'https://vimeo.com/' + vm if vm else '#' - if videoUrl == '#': - print(entry.get('media', 'NO MEDIA!')) - # raise Exception - r['body'] = '' + html2text(m.get('body', '')) # FIXME elif entry.get('type') == 'Music': r['body'] = '

', '').replace('

', '') # else: ext = 'md' - content = dumped open('migration/content/' + - metadata['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content) - + r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content) try: shout_dict = r.copy() shout_dict['authors'] = [user, ] if entry.get('createdAt') is not None: - shout_dict['createdAt'] = parse(entry.get('createdAt')) + shout_dict['createdAt'] = parse(r.get('createdAt')) else: shout_dict['createdAt'] = ts if entry.get('published'): @@ -196,9 +204,9 @@ def migrate(entry): else: shout_dict['publishedAt'] = ts del shout_dict['published'] - del shout_dict['views'] # FIXME - del shout_dict['rating'] # FIXME - del shout_dict['ratings'] + # del shout_dict['views'] + # del shout_dict['rating'] + del shout_dict['ratings'] # FIXME try: s = Shout.create(**shout_dict) r['id'] = s.id @@ -209,4 +217,4 @@ def migrate(entry): print(r) # print(s) raise Exception - return (r, content) + return r