migration-orm-fixes

This commit is contained in:
tonyrewin 2022-07-07 16:55:13 +03:00
parent bd4221e9af
commit 56dcd7ecbc
23 changed files with 706 additions and 799 deletions

4
.gitignore vendored
View File

@ -142,4 +142,6 @@ migration/content/**/*.md
*.zip
*.sqlite3
*.rdb
.DS_Store
.DS_Store
dump
.vscode

11
.vscode/settings.json vendored
View File

@ -1,11 +0,0 @@
{
"sqltools.connections": [
{
"previewLimit": 50,
"driver": "SQLite",
"database": "${workspaceFolder:discours-backend}/db.sqlite3",
"name": "local-discours-backend"
}
],
"sqltools.useNodeRuntime": true
}

View File

@ -1,339 +1,229 @@
''' cmd managed migration '''
import json
import frontmatter
from migration.extract import extract
from migration.export import export_email_subscriptions, export_mdx, export_slug
from migration.tables.users import migrate as migrateUser
from migration.tables.users import migrate_2stage as migrateUser_2stage
from migration.tables.users import migrate_email_subscription
from migration.tables.content_items import get_metadata, migrate as migrateShout
from migration.tables.content_item_categories import migrate as migrateCategory
from migration.tables.tags import migrate as migrateTag
from migration.tables.content_items import get_shout_slug, migrate as migrateShout
from migration.tables.topics import migrate as migrateTopic
from migration.tables.comments import migrate as migrateComment
from migration.tables.comments import migrate_2stage as migrateComment_2stage
from migration.utils import DateTimeEncoder
from orm import Community, Topic
from dateutil.parser import parse as date_parse
from orm.base import local_session
from orm import User
OLD_DATE = '2016-03-05 22:22:00.350000'
def users(users_by_oid, users_by_slug, users_data):
def users_handle(storage):
''' migrating users first '''
# limiting
limit = len(users_data)
if len(sys.argv) > 2: limit = int(sys.argv[2])
print('[migration] %d users...' % limit)
counter = 0
id_map = {}
for entry in users_data:
print('[migration] migrating %d users' %(len(storage['users']['data'])))
for entry in storage['users']['data']:
oid = entry['_id']
user = migrateUser(entry)
users_by_oid[oid] = user # full
storage['users']['by_oid'][oid] = user # full
del user['password']
del user['notifications']
# del user['oauth']
del user['emailConfirmed']
del user['username']
del user['email']
users_by_slug[user['slug']] = user # public
id_map[user['old_id']] = user['slug']
storage['users']['by_slug'][user['slug']] = user # public
id_map[user['oid']] = user['slug']
counter += 1
# print(' - * - stage 2 users migration - * -')
ce = 0
for entry in users_data:
for entry in storage['users']['data']:
ce += migrateUser_2stage(entry, id_map)
# print(str(len(users_by_slug.items())) + ' users migrated')
print('[migration] %d user ratings errors' % ce)
#try:
# open('migration/data/users.old_id.json', 'w').write(json.dumps(users_by_oid, cls=DateTimeEncoder)) # NOTE: by old_id
# open('migration/data/users.slug.json', 'w').write(json.dumps(users_by_slug, cls=DateTimeEncoder)) # NOTE: by slug
#except Exception:
# print('json dump error')
# # print(users_by_oid)
def topics(export_topics, topics_by_slug, topics_by_oid, cats_data, tags_data):
def topics_handle(storage):
''' topics from categories and tags '''
# limiting
limit = len(cats_data) + len(tags_data)
if len(sys.argv) > 2: limit = int(sys.argv[2])
print('[migration] %d topics...' % limit)
counter = 0
retopics = json.loads(open('migration/tables/replacements.json').read())
topicslugs_by_oid = {}
for tag in tags_data:
topicslugs_by_oid[tag['_id']] = tag['slug']
oldid = tag['_id']
tag['slug'] = retopics.get(tag['slug'], tag['slug'])
topic = migrateTag(tag, topics_by_oid)
topics_by_oid[oldid] = topic
topics_by_slug[topic['slug']] = topic
counter += 1
for cat in cats_data:
topicslugs_by_oid[cat['_id']] = cat['slug']
if not cat.get('hidden'):
oldid = cat['_id']
cat['slug'] = retopics.get(cat['slug'], cat['slug'])
try: topic = migrateCategory(cat, topics_by_oid)
except Exception as e: raise e
topics_by_oid[oldid] = topic
topic['slug'] = retopics.get(topic['slug'], topic['slug'])
topics_by_slug[topic['slug']] = topic
for t in (storage['topics']['tags'] + storage['topics']['cats']):
if t['slug'] in storage['replacements']:
t['slug'] = storage['replacements'][t['slug']]
topic = migrateTopic(t)
storage['topics']['by_oid'][t['_id']] = topic
storage['topics']['by_slug'][t['slug']] = topic
counter += 1
for oid, oslug in topicslugs_by_oid.items():
if topics_by_slug.get(oslug):
topics_by_oid[oid] = topics_by_slug.get(retopics.get(oslug, oslug))
print( '[migration] ' + str(len(topics_by_oid.values())) + ' topics by oid' )
print( '[migration] ' + str(len(topics_by_slug.values())) + ' topics by slug' )
#replacements = {} # json.loads(open('migration/tables/replacements.json').read())
#for t in topics_by_title.values():
# slug = replacements.get(t['slug'].strip()) or t['slug'].strip()
# topics_by_slug[slug] = t
export_topics = topics_by_slug
#for i in topicslugs:
# export_topics[i] = i
#open('migration/tables/replacements2.json', 'w').write(json.dumps(export_topics,
# cls=DateTimeEncoder,
# indent=4,
# sort_keys=True,
# ensure_ascii=False))
else:
print('[migration] topic ' + t['slug'] + ' ignored')
for oldslug, newslug in storage['replacements'].items():
if oldslug != newslug and oldslug in storage['topics']['by_slug']:
oid = storage['topics']['by_slug'][oldslug]['_id']
del storage['topics']['by_slug'][oldslug]
storage['topics']['by_oid'][oid] = storage['topics']['by_slug'][newslug]
print( '[migration] ' + str(counter) + ' topics migrated')
print( '[migration] ' + str(len(storage['topics']['by_oid'].values())) + ' topics by oid' )
print( '[migration] ' + str(len(storage['topics']['by_slug'].values())) + ' topics by slug' )
# raise Exception
def shouts(content_data, shouts_by_slug, shouts_by_oid):
def shouts_handle(storage):
''' migrating content items one by one '''
# limiting
limit = len(content_data)
if len(sys.argv) > 2: limit = int(sys.argv[2])
print('[migration] %d content items...' % limit)
counter = 0
discours_author = 0
errored = []
pub_counter = 0
# limiting
try: limit = int(sys.argv[2]) if len(sys.argv) > 2 else len(content_data)
except ValueError: limit = len(content_data)
for entry in content_data[:limit]:
if 'slug' in sys.argv and entry['slug'] not in sys.argv: continue
try:
shout, terrors = migrateShout(entry, users_by_oid, topics_by_oid)
if entry.get('published'): pub_counter += 1
author = shout['authors'][0]
shout['authors'] = [ author.id, ]
newtopics = []
retopics = json.loads(open('migration/tables/replacements.json').read())
for slug in shout['topics']:
nt = retopics.get(slug, slug)
if nt not in newtopics:
newtopics.append(nt)
shout['topics'] = newtopics
shouts_by_slug[shout['slug']] = shout
shouts_by_oid[entry['_id']] = shout
line = str(counter+1) + ': ' + shout['slug'] + " @" + str(author.slug)
counter += 1
if author.slug == 'discours': discours_author += 1
print(line)
# open('./shouts.id.log', 'a').write(line + '\n')
except Exception as e:
# print(entry['_id'])
errored.append(entry)
raise e
# print(te)
# open('migration/data/shouts.old_id.json','w').write(json.dumps(shouts_by_oid, cls=DateTimeEncoder))
# open('migration/data/shouts.slug.json','w').write(json.dumps(shouts_by_slug, cls=DateTimeEncoder))
for entry in storage['shouts']['data']:
oid = entry['_id']
# slug
slug = get_shout_slug(entry)
# single slug mode
if '-' in sys.argv and slug not in sys.argv: continue
# migrate
shout = migrateShout(entry, storage)
# shouts.topics
if not shout['topics']: print('[migration] no topics!')
# wuth author
author = shout['authors'][0].slug
if author =='discours': discours_author += 1
# print('[migration] ' + shout['slug'] + ' with author ' + author)
if entry.get('published'):
export_mdx(shout)
pub_counter += 1
# print main counter
counter += 1
line = str(counter+1) + ': ' + shout['slug'] + " @" + author
print(line)
print('[migration] ' + str(counter) + ' content items were migrated')
print('[migration] ' + str(pub_counter) + ' have been published')
print('[migration] ' + str(discours_author) + ' authored by @discours')
def export_shouts(shouts_by_slug, export_articles, export_authors, content_dict):
# update what was just migrated or load json again
if len(export_authors.keys()) == 0:
export_authors = json.loads(open('../src/data/authors.json').read())
print('[migration] ' + str(len(export_authors.items())) + ' exported authors loaded')
if len(export_articles.keys()) == 0:
export_articles = json.loads(open('../src/data/articles.json').read())
print('[migration] ' + str(len(export_articles.items())) + ' exported articles loaded')
# limiting
limit = 33
if len(sys.argv) > 2: limit = int(sys.argv[2])
print('[migration] ' + 'exporting %d articles to json...' % limit)
# filter
export_list = [i for i in shouts_by_slug.items() if i[1]['layout'] == 'article']
export_list = sorted(export_list, key=lambda item: item[1]['createdAt'] or OLD_DATE, reverse=True)
print('[migration] ' + str(len(export_list)) + ' filtered')
export_list = export_list[:limit or len(export_list)]
for (slug, article) in export_list:
if article['layout'] == 'article':
export_slug(slug, export_articles, export_authors, content_dict)
def export_body(article, content_dict):
article['body'] = extract(article['body'], article['oid'])
metadata = get_metadata(article)
content = frontmatter.dumps(frontmatter.Post(article['body'], **metadata))
open('../discoursio-web/content/' + article['slug'] + '.mdx', 'w').write(content)
open('../discoursio-web/content/'+ article['slug'] + '.html', 'w').write(content_dict[article['old_id']]['body'])
def export_slug(slug, export_articles, export_authors, content_dict):
print('[migration] ' + 'exporting %s ' % slug)
if export_authors == {}:
export_authors = json.loads(open('../src/data/authors.json').read())
print('[migration] ' + str(len(export_authors.items())) + ' exported authors loaded')
if export_articles == {}:
export_articles = json.loads(open('../src/data/articles.json').read())
print('[migration] ' + str(len(export_articles.items())) + ' exported articles loaded')
shout = shouts_by_slug.get(slug, False)
assert shout, 'no data error'
author = users_by_slug.get(shout['authors'][0]['slug'], None)
export_authors.update({shout['authors'][0]['slug']: author})
export_articles.update({shout['slug']: shout})
export_body(shout, content_dict)
comments([slug, ])
def comments(comments_data):
def comments_handle(storage):
id_map = {}
for comment in comments_data:
comment = migrateComment(comment, shouts_by_oid)
ignored_counter = 0
for oldcomment in storage['comments']['data']:
comment = migrateComment(oldcomment, storage)
if not comment:
print('[migration] comment ignored \n%r\n' % oldcomment)
ignored_counter += 1
continue
id = comment.get('id')
old_id = comment.get('old_id')
id_map[old_id] = id
for comment in comments_data:
migrateComment_2stage(comment, id_map)
print('[migration] ' + str(len(id_map)) + ' comments exported')
def export_email_subscriptions():
email_subscriptions_data = json.loads(open('migration/data/email_subscriptions.json').read())
print('[migration] ' + str(len(email_subscriptions_data)) + ' email subscriptions loaded')
for data in email_subscriptions_data:
migrate_email_subscription(data)
print('[migration] ' + str(len(email_subscriptions_data)) + ' email subscriptions exported')
def export_finish(export_articles = {}, export_authors = {}, export_topics = {}, export_comments = {}):
open('../src/data/authors.json', 'w').write(json.dumps(export_authors,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print('[migration] ' + str(len(export_authors.items())) + ' authors exported')
open('../src/data/topics.json', 'w').write(json.dumps(export_topics,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print('[migration] ' + str(len(export_topics.keys())) + ' topics exported')
oid = comment.get('oid')
id_map[oid] = id
for comment in storage['comments']['data']: migrateComment_2stage(comment, id_map)
print('[migration] ' + str(len(id_map)) + ' comments migrated')
print('[migration] ' + str(ignored_counter) + ' comments ignored')
open('../src/data/articles.json', 'w').write(json.dumps(export_articles,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print('[migration] ' + str(len(export_articles.items())) + ' articles exported')
open('../src/data/comments.json', 'w').write(json.dumps(export_comments,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print('[migration] ' + str(len(export_comments.items())) + ' exported articles with comments')
def bson_handle():
# decode bson # preparing data
from migration import bson2json
bson2json.json_tables()
def export_one(slug, storage):
topics_handle(storage)
users_handle(storage)
shouts_handle(storage)
export_slug(slug, storage)
def all_handle(storage):
print('[migration] everything!')
users_handle(storage)
topics_handle(storage)
shouts_handle(storage)
comments_handle(storage)
export_email_subscriptions()
print('[migration] everything done!')
def data_load():
storage = {
'content_items': {
'by_oid': {},
'by_slug': {},
},
'shouts': {
'by_oid': {},
'by_slug': {},
'data': []
},
'comments': {
'by_oid': {},
'by_slug': {},
'by_content': {},
'data': []
},
'topics': {
'by_oid': {},
'by_slug': {},
'cats': [],
'tags': [],
},
'users': {
'by_oid': {},
'by_slug': {},
'data': []
},
'replacements': json.loads(open('migration/tables/replacements.json').read())
}
users_data = []
tags_data = []
cats_data = []
comments_data = []
content_data = []
try:
users_data = json.loads(open('migration/data/users.json').read())
print('[migration] ' + str(len(users_data)) + ' users loaded')
tags_data = json.loads(open('migration/data/tags.json').read())
storage['topics']['tags'] = tags_data
print('[migration] ' + str(len(tags_data)) + ' tags loaded')
cats_data = json.loads(open('migration/data/content_item_categories.json').read())
storage['topics']['cats'] = cats_data
print('[migration] ' + str(len(cats_data)) + ' cats loaded')
comments_data = json.loads(open('migration/data/comments.json').read())
storage['comments']['data'] = comments_data
print('[migration] ' + str(len(comments_data)) + ' comments loaded')
content_data = json.loads(open('migration/data/content_items.json').read())
storage['shouts']['data'] = content_data
print('[migration] ' + str(len(content_data)) + ' content items loaded')
# fill out storage
for x in users_data:
storage['users']['by_oid'][x['_id']] = x
# storage['users']['by_slug'][x['slug']] = x
# no user.slug yet
print('[migration] ' + str(len(storage['users']['by_oid'].keys())) + ' users by oid')
for x in tags_data:
storage['topics']['by_oid'][x['_id']] = x
storage['topics']['by_slug'][x['slug']] = x
for x in cats_data:
storage['topics']['by_oid'][x['_id']] = x
storage['topics']['by_slug'][x['slug']] = x
print('[migration] ' + str(len(storage['topics']['by_slug'].keys())) + ' topics by slug')
for item in content_data:
slug = get_shout_slug(item)
storage['content_items']['by_slug'][slug] = item
storage['content_items']['by_oid'][item['_id']] = item
print('[migration] ' + str(len(content_data)) + ' content items')
for x in comments_data:
storage['comments']['by_oid'][x['_id']] = x
cid = x['contentItem']
storage['comments']['by_content'][cid] = x
ci = storage['content_items']['by_oid'].get(cid, {})
if 'slug' in ci: storage['comments']['by_slug'][ci['slug']] = x
print('[migration] ' + str(len(storage['comments']['by_content'].keys())) + ' with comments')
except Exception as e: raise e
storage['users']['data'] = users_data
storage['topics']['tags'] = tags_data
storage['topics']['cats'] = cats_data
storage['shouts']['data'] = content_data
storage['comments']['data'] = comments_data
return storage
if __name__ == '__main__':
import sys
if len(sys.argv) > 1:
cmd = sys.argv[1]
if cmd == "bson":
# decode bson
from migration import bson2json
bson2json.json_tables()
else:
# preparing data
# users
users_data = json.loads(open('migration/data/users.json').read())
print('[migration] ' + str(len(users_data)) + ' users loaded')
users_by_oid = {}
users_by_slug = {}
user_id_map = {}
with local_session() as session:
users_list = session.query(User).all()
for user in users_list:
user_id_map[user.old_id] = user.id
users_by_oid[user.old_id] = vars(user)
# tags
tags_data = json.loads(open('migration/data/tags.json').read())
print('[migration] ' + str(len(tags_data)) + ' tags loaded')
# cats
cats_data = json.loads(open('migration/data/content_item_categories.json').read())
print('[migration] ' + str(len(cats_data)) + ' cats loaded')
topics_data = tags_data
tags_data.extend(cats_data)
oldtopics_by_oid = { x['_id']: x for x in topics_data }
oldtopics_by_slug = { x['slug']: x for x in topics_data }
topics_by_oid = {}
topics_by_slug = {}
# content
content_data = json.loads(open('migration/data/content_items.json').read())
content_dict = { x['_id']: x for x in content_data }
print('[migration] ' + str(len(content_data)) + ' content items loaded')
shouts_by_slug = {}
shouts_by_oid = {}
comments_data = json.loads(open('migration/data/comments.json').read())
print('[migration] ' + str(len(comments_data)) + ' comments loaded')
comments_by_post = {}
# sort comments by old posts ids
for old_comment in comments_data:
cid = old_comment['contentItem']
comments_by_post[cid] = comments_by_post.get(cid, [])
if not old_comment.get('deletedAt', True):
comments_by_post[cid].append(old_comment)
print('[migration] ' + str(len(comments_by_post.keys())) + ' articles with comments')
export_articles = {} # slug: shout
export_authors = {} # slug: user
export_comments = {} # shout-slug: comment[] (list)
export_topics = {} # slug: topic
##################### COMMANDS ##########################3
if cmd == "users":
users(users_by_oid, users_by_slug, users_data)
elif cmd == "topics":
topics(export_topics, topics_by_slug, topics_by_oid, cats_data, tags_data)
elif cmd == "shouts":
shouts(content_data, shouts_by_slug, shouts_by_oid) # NOTE: listens limit
elif cmd == "comments":
comments(comments_data)
elif cmd == "export_shouts":
export_shouts(shouts_by_slug, export_articles, export_authors, content_dict)
elif cmd == "email_subscriptions":
export_email_subscriptions()
elif cmd == 'slug':
export_slug(sys.argv[2], export_articles, export_authors, content_dict)
elif cmd == "all":
users(users_by_oid, users_by_slug, users_data)
topics(export_topics, topics_by_slug, topics_by_oid, cats_data, tags_data)
shouts(content_data, shouts_by_slug, shouts_by_oid)
comments(comments_data)
export_email_subscriptions()
else:
print('[migration] --- debug users, topics, shouts')
users(users_by_oid, users_by_slug, users_data)
topics(export_topics, topics_by_slug, topics_by_oid, cats_data, tags_data)
shouts(content_data, shouts_by_slug, shouts_by_oid)
#export_finish(export_articles, export_authors, export_topics, export_comments)
cmd = sys.argv[1]
print('[migration] command: ' + cmd)
if cmd == 'bson':
bson_handle()
else:
storage = data_load()
if cmd == '-': export_one(sys.argv[2], storage)
else: all_handle(storage)
else:
print('usage: python migrate.py bson')
print('.. \ttopics <limit>')
print('.. \tusers <limit>')
print('.. \tshouts <limit>')
print('.. \texport_shouts <limit>')
print('.. \tslug <slug>')
print('.. \t- <slug>')
print('.. \tall')

View File

@ -1,3 +1,4 @@
import os
import bson
import json
@ -17,10 +18,11 @@ def json_tables():
lc = []
with open('migration/data/'+table+'.bson', 'rb') as f:
bs = f.read()
f.close()
base = 0
while base < len(bs):
base, d = bson.decode_document(bs, base)
lc.append(d)
data[table] = lc
open('dump/discours/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))
open(os.getcwd() + '/dump/discours/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))

100
migration/export.py Normal file
View File

@ -0,0 +1,100 @@
from datetime import datetime
import json
import os
import frontmatter
from migration.extract import prepare_body
from migration.tables.users import migrate_email_subscription
from migration.utils import DateTimeEncoder
OLD_DATE = '2016-03-05 22:22:00.350000'
EXPORT_DEST = '../discoursio-web/data/'
parentDir = '/'.join(os.getcwd().split('/')[:-1])
contentDir = parentDir + '/discoursio-web/content/'
ts = datetime.now()
def get_metadata(r):
authors = []
for a in r['authors']:
authors.append({ # a short version for public listings
'slug': a.slug or 'discours',
'name': a.name or 'Дискурс',
'userpic': a.userpic or 'https://discours.io/static/img/discours.png'
})
metadata = {}
metadata['title'] = r.get('title', '').replace('{', '(').replace('}', ')')
metadata['authors'] = authors
metadata['createdAt'] = r.get('createdAt', ts)
metadata['layout'] = r['layout']
metadata['topics'] = [topic for topic in r['topics']]
metadata['topics'].sort()
if r.get('cover', False): metadata['cover'] = r.get('cover')
return metadata
def export_mdx(r):
# print('[export] mdx %s' % r['slug'])
content = ''
metadata = get_metadata(r)
content = frontmatter.dumps(frontmatter.Post(r['body'], **metadata))
ext = 'mdx'
filepath = contentDir + r['slug']
bc = bytes(content,'utf-8').decode('utf-8','ignore')
open(filepath + '.' + ext, 'w').write(bc)
def export_body(shout, storage):
shout['body'] = prepare_body(storage['content_items']['by_oid'][shout['oid']])
export_mdx(shout)
print('[export] trying to save html %s' % shout['slug'])
open(contentDir + shout['slug'] + '.html', 'w').write(storage['content_items']['by_oid'][shout['oid']]['body'])
def export_slug(slug, storage):
shout = storage['shouts']['by_slug'][slug]
shout = storage['shouts']['by_slug'].get(slug)
assert shout, '[export] no shout found by slug: %s ' % slug
author = storage['users']['by_slug'].get(shout['authors'][0]['slug'])
assert author, '[export] no author error'
export_body(shout, storage)
def export_email_subscriptions():
email_subscriptions_data = json.loads(open('migration/data/email_subscriptions.json').read())
for data in email_subscriptions_data:
migrate_email_subscription(data)
print('[migration] ' + str(len(email_subscriptions_data)) + ' email subscriptions exported')
def export_shouts(storage):
# update what was just migrated or load json again
if len(storage['users']['by_slugs'].keys()) == 0:
storage['users']['by_slugs'] = json.loads(open(EXPORT_DEST + 'authors.json').read())
print('[migration] ' + str(len(storage['users']['by_slugs'].keys())) + ' exported authors loaded')
if len(storage['shouts']['by_slugs'].keys()) == 0:
storage['shouts']['by_slugs'] = json.loads(open(EXPORT_DEST + 'articles.json').read())
print('[migration] ' + str(len(storage['shouts']['by_slugs'].keys())) + ' exported articles loaded')
for slug in storage['shouts']['by_slugs'].keys(): export_slug(slug, storage)
def export_json(export_articles = {}, export_authors = {}, export_topics = {}, export_comments = {}):
open(EXPORT_DEST + 'authors.json', 'w').write(json.dumps(export_authors,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print('[migration] ' + str(len(export_authors.items())) + ' authors exported')
open(EXPORT_DEST + 'topics.json', 'w').write(json.dumps(export_topics,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print('[migration] ' + str(len(export_topics.keys())) + ' topics exported')
open(EXPORT_DEST + 'articles.json', 'w').write(json.dumps(export_articles,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print('[migration] ' + str(len(export_articles.items())) + ' articles exported')
open(EXPORT_DEST + 'comments.json', 'w').write(json.dumps(export_comments,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print('[migration] ' + str(len(export_comments.items())) + ' exported articles with comments')

View File

@ -1,16 +1,16 @@
import json
import os
import re
import base64
import sys
from migration.html2text import html2text
TOOLTIP_REGEX = r'(\/\/\/(.+)\/\/\/)'
contentDir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'discoursio-web', 'content')
s3 = 'https://discours-io.s3.amazonaws.com/'
cdn = 'https://assets.discours.io'
retopics = json.loads(open('migration/tables/replacements.json', 'r').read())
def replace_tooltips(body):
def replace_tooltips(body):
# FIXME: if you prefer regexp
newbody = body
matches = list(re.finditer(TOOLTIP_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
for match in matches:
@ -21,37 +21,40 @@ def replace_tooltips(body):
def place_tooltips(body):
parts = body.split('///')
parts = body.split('&&&')
l = len(parts)
newparts = list(parts)
placed = False
if l & 1:
if l > 1:
i = 1
print('[extract] found %d tooltips' % (l-1))
for part in parts[1:]:
if i & 1:
# print([ len(p) for p in parts ])
# print('[extract] tooltip: ' + part)
if 'a class="footnote-url" href=' in part:
print('[extract] footnote: ' + part)
fn = 'a class="footnote-url" href="'
link = part.split(fn,1)[1].split('"', 1)[0]
extracted_part = part.split(fn,1)[0] + ' ' + part.split('/', 1)[-1]
newparts[i] = '<Tooltip' + (' link="' + link + '" ' if link else '') + '>' + extracted_part + '</Tooltip>'
else:
newparts[i] = '<Tooltip>%s</Tooltip>' % part
# print('[extract] tooltip: ' + newparts[i])
else:
# print('[extract] pass: ' + part[:10] + '..')
newparts[i] = part
i += 1
return ''.join(newparts)
placed = True
return (''.join(newparts), placed)
IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}=|[A-Za-z\d+\/]{2}==)))\)"
public = '../discoursio-web/public'
cache = {}
def reextract_images(body, oid):
def reextract_images(body, oid):
# FIXME: if you prefer regexp
matches = list(re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
i = 0
for match in matches:
@ -80,54 +83,50 @@ IMAGES = {
sep = ';base64,'
def extract_imageparts(bodyparts, prefix):
# recursive loop
for current in bodyparts:
i = bodyparts.index(current)
for mime in IMAGES.keys():
if mime == current[-len(mime):] and (i + 1 < len(bodyparts)):
print('[extract] ' + mime)
next = bodyparts[i+1]
ext = IMAGES[mime]
b64end = next.index(')')
b64encoded = next[:b64end]
name = prefix + '-' + str(len(cache))
link = '/upload/image-' + name + '.' + ext
print('[extract] name: ' + name)
print('[extract] link: ' + link)
print('[extract] %d bytes' % len(b64encoded))
if b64encoded not in cache:
try:
content = base64.b64decode(b64encoded + '==')
open(public + link, 'wb').write(content)
print('[extract] ' +str(len(content)) + ' image bytes been written')
cache[b64encoded] = name
except:
raise Exception
# raise Exception('[extract] error decoding image %r' %b64encoded)
else:
print('[extract] cached: ' + cache[b64encoded])
name = cache[b64encoded]
link = cdn + '/upload/image-' + name + '.' + ext
bodyparts[i] = current[:-len(mime)] + current[-len(mime):] + link + next[-b64end:]
bodyparts[i+1] = next[:-b64end]
break
return extract_imageparts(sep.join(bodyparts[i+1:]), prefix) \
if len(bodyparts) > (i + 1) else ''.join(bodyparts)
def extract_images(body, oid):
newbody = ''
body = body.replace(' [](data:image', '![](data:image').replace('\n[](data:image', '![](data:image')
oldparts = body.split(sep)
newparts = list(oldparts)
# print()
if len(oldparts) > 1:
print('[extract] images for %s' % oid)
print('[extract] %d candidates' % (len(oldparts)-1))
i = 0
for current in oldparts:
next = ''
try: next = oldparts[i+1]
except: newbody += current
start = oldparts.index(current) == 0
end = not next
if end:
continue
else: # start or between
for mime in IMAGES.keys():
if mime in current[-15:]:
print('[extract] ' + current[-15:])
if ')' in next:
b64encoded = next.split(')')[0]
print('[extract] '+str(i+1)+': %d bytes' % len(b64encoded))
ext = IMAGES[mime]
print('[extract] type: ' + mime)
name = oid + '-' + str(i)
print('[extract] name: ' + name)
link = '/upload/image-' + name + '.' + ext
print('[extract] link: ' + link)
if b64encoded:
if b64encoded not in cache:
content = base64.b64decode(b64encoded + '==')
open(public + link, 'wb').write(content)
cache[b64encoded] = name
else:
print('[extract] cached: ' + cache[b64encoded])
name = cache[b64encoded]
link = cdn + '/upload/image-' + name + '.' + ext
newparts[i] = current.split('![](' + mime)[0] + '![](' + link + ')'
newparts[i+1] = next.replace(b64encoded + ')', '')
else:
print('[extract] ERROR: no b64encoded')
# print(current[-15:])
i += 1
newbody = ''.join(newparts)
body = body\
.replace(' [](data:image', '![](data:image')\
.replace('\n[](data:image', '![](data:image')
parts = body.split(sep)
i = 0
if len(parts) > 1: newbody = extract_imageparts(parts, oid)
else: newbody = body
return newbody
@ -149,25 +148,34 @@ def cleanup(body):
return newbody
def extract(body, oid):
newbody = extract_images(body, oid)
newbody = cleanup(newbody)
newbody = place_tooltips(newbody)
return newbody
if body:
newbody = extract_images(body, oid)
if not newbody: raise Exception('extract_images error')
newbody = cleanup(newbody)
if not newbody: raise Exception('cleanup error')
newbody, placed = place_tooltips(newbody)
if not newbody: raise Exception('place_tooltips error')
if placed:
newbody = 'import Tooltip from \'$/components/Article/Tooltip\'\n\n' + newbody
return newbody
return body
def prepare_body(entry):
# print('[migration] preparing body %s' % entry.get('slug',''))
# body modifications
body = ''
body_orig = entry.get('body', '')
if not body_orig: body_orig = ''
if entry.get('type') == 'Literature':
print('[extract] literature')
for m in entry.get('media', []):
t = m.get('title', '')
if t: body_orig += '<h5>' + t + '</h5>\n'
body_orig += (m.get('body', '') or '')
body_orig += '\n' + m.get('literatureBody', '') + '\n'
body_orig += (m.get('body') or '').replace((m.get('literatureBody') or ''), '') + m.get('literatureBody', '') + '\n'
elif entry.get('type') == 'Video':
print('[extract] embedding video')
providers = set([])
video_url = ''
require = False
@ -187,8 +195,10 @@ def prepare_body(entry):
body += extract(html2text(m.get('body', '')), entry['_id'])
if video_url == '#': print(entry.get('media', 'UNKNOWN MEDIA PROVIDER!'))
if require: body = 'import { ' + ','.join(list(providers)) + ' } from \'solid-social\'\n\n' + body + '\n'
# already body_orig = entry.get('body', '')
elif entry.get('type') == 'Music':
print('[extract] music album')
for m in entry.get('media', []):
artist = m.get('performer')
trackname = ''
@ -197,42 +207,46 @@ def prepare_body(entry):
body += '<MusicPlayer src=\"' + m.get('fileUrl','') + '\" title=\"' + trackname + '\" />\n'
body += extract(html2text(m.get('body', '')), entry['_id'])
body = 'import MusicPlayer from \'$/components/Article/MusicPlayer\'\n\n' + body + '\n'
# already body_orig = entry.get('body', '')
elif entry.get('type') == 'Image':
print('[extract] image gallery')
cover = ''
if 'thumborId' in entry: cover = cdn + '/unsafe/1600x/' + entry['thumborId']
if not cover and 'image' in entry:
cover = entry['image'].get('url', '')
if not cover:
if 'image' in entry: cover = entry['image'].get('url', '')
if 'cloudinary' in cover: cover = ''
else:
print('[migration] cover: ' + cover)
images = {}
for m in entry.get('media', []):
t = m.get('title', '')
if t: body += '#### ' + t + '\n'
u = m.get('image', {}).get('url', '')
if 'cloudinary' in u:
u = m.get('thumborId')
if not u: u = cover
b = ''
title = m.get('title','').replace('\n', ' ').replace('&nbsp;', ' ')
u = m.get('image', {}).get('url', '') or m.get('thumborId') or cover
u = str(u)
b += '<h4>' + title + '</h4>\n' + body_orig
if not u.startswith('http'): u = s3 + u
if not u: print('[extract] no image for ' + str(m))
if 'cloudinary' in u: u = 'img/lost.svg'
if u not in images.keys():
if u.startswith('production'): u = s3 + u
body += '![' + m.get('title','').replace('\n', ' ') + '](' + u + ')\n' # TODO: gallery here
images[u] = u
body += extract(html2text(m.get('body', '')), entry['_id']) + '\n'
# print('[extract] image: ' + u)
images[u] = title
b += '<img src=\"' + u + '\" alt=\"'+ title +'\" />\n'
b += m.get('body', '') + '\n'
body += extract(html2text(b), entry['_id'])
if not body_orig:
print('[prepare] using body history...')
# print(entry.get('bodyHistory', ''))
try:
for up in entry.get('bodyHistory', []):
body_orig = up.get('text', '') or ''
if body_orig: break
except: pass
elif not body_orig:
for up in entry.get('bodyHistory', []) or []:
body_orig = up.get('text', '') or ''
if body_orig:
print('[extract] body from history!')
break
if not body and not body_orig: print('[extract] error: EMPTY BODY')
# body_html = str(BeautifulSoup(body_orig, features="html.parser"))
body += extract(html2text(body_orig), entry['_id'])
# replace some topics
for oldtopicslug, newtopicslug in retopics.items():
body.replace(oldtopicslug, newtopicslug)
# print('[extract] adding original body')
if body_orig: body += extract(html2text(body_orig), entry['_id'])
if entry['slug'] in sys.argv:
open(contentDir + '/' + entry['slug'] + '.html', 'w')\
.write(entry.get('body',''))
return body

View File

@ -535,8 +535,7 @@ class HTML2Text(html.parser.HTMLParser):
if start:
if 'data-original-title' in attrs:
# WARNING: old discours specific code
if 'import Tooltip' not in self.outtextlist[0]: self.outtextlist.insert(0, 'import Tooltip from "$/components/Article/Tooltip"\n\n')
self.o('///%s///' % attrs['data-original-title'])
self.o('&&&%s&&&' % attrs['data-original-title'])
else:
if (
"href" in attrs
@ -1033,10 +1032,10 @@ class HTML2Text(html.parser.HTMLParser):
return result
def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = None) -> str:
if bodywidth is None:
bodywidth = config.BODY_WIDTH
h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
h = h.handle(html)
def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH) -> str:
h = html.strip() or ''
if h:
h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
h = h.handle(html.strip())
print('[html2text] %d bytes' % len(html))
return h

View File

@ -7,7 +7,7 @@ UNICODE_SNOB = True
TABLE_MARKER_FOR_PAD = "special_marker_for_table_padding"
# Escape all special characters. Output is less readable, but avoids
# corner case formatting issues.
ESCAPE_SNOB = False
ESCAPE_SNOB = True
# Put the links after each paragraph instead of at the end.
LINKS_EACH_PARAGRAPH = False
@ -46,10 +46,10 @@ IMAGES_AS_HTML = False
IMAGES_TO_ALT = False
IMAGES_WITH_SIZE = False
IGNORE_EMPHASIS = False
MARK_CODE = False
MARK_CODE = True
DECODE_ERRORS = "strict"
DEFAULT_IMAGE_ALT = ""
PAD_TABLES = False
PAD_TABLES = True
# Convert links with same href and text to <href> format
# if they are absolute links

View File

@ -1,12 +1,13 @@
from datetime import datetime
from dateutil.parser import parse as date_parse
import json
import datetime
from os.path import abspath
from orm import Shout, Comment, CommentRating, User
from orm import Comment, CommentRating, User
from orm.base import local_session
from migration.html2text import html2text
from orm.shout import Shout
def migrate(entry, shouts_by_oid):
ts = datetime.now()
def migrate(entry, storage):
'''
{
"_id": "hdtwS8fSyFLxXCgSC",
@ -28,60 +29,70 @@ def migrate(entry, shouts_by_oid):
type Comment {
id: Int!
author: Int!
createdBy: User!
body: String!
replyTo: Int!
replyTo: Comment!
createdAt: DateTime!
updatedAt: DateTime
shout: Int!
shout: Shout!
deletedAt: DateTime
deletedBy: Int
deletedBy: User
ratings: [CommentRating]
views: Int
}
'''
if entry.get('deleted'): return
comment_dict = {}
# FIXME: comment_dict['createdAt'] = ts if not entry.get('createdAt') else date_parse(entry.get('createdAt'))
# print('[migration] comment original date %r' % entry.get('createdAt'))
# print('[migration] comment date %r ' % comment_dict['createdAt'])
comment_dict['body'] = html2text(entry.get('body', ''))
comment_dict['oid'] = entry['_id']
if entry.get('createdAt'): comment_dict['createdAt'] = date_parse(entry.get('createdAt'))
shout_oid = entry.get('contentItem')
if not shout_oid in storage['shouts']['by_oid']:
print('[migration] no shout for comment', entry)
else:
with local_session() as session:
author = session.query(User).filter(User.oid == entry['createdBy']).first()
shout_dict = storage['shouts']['by_oid'][shout_oid]
if shout_dict:
comment_dict['shout'] = shout_dict['oid']
comment_dict['createdBy'] = author.slug if author else 'discours'
# FIXME if entry.get('deleted'): comment_dict['deletedAt'] = date_parse(entry['updatedAt']) or ts
# comment_dict['deletedBy'] = session.query(User).filter(User.oid == (entry.get('updatedBy') or dd['oid'])).first()
# FIXME if entry.get('updatedAt'): comment_dict['updatedAt'] = date_parse(entry['updatedAt']) or ts
#for [k, v] in comment_dict.items():
# if not v: del comment_dict[f]
# if k.endswith('At'):
# try: comment_dict[k] = datetime(comment_dict[k])
# except: print(k)
# # print('[migration] comment keys:', f)
shout_old_id = entry['contentItem']
if not shout_old_id in shouts_by_oid:
return
shout = shouts_by_oid[shout_old_id]
with local_session() as session:
author = session.query(User).filter(User.old_id == entry['createdBy']).first()
comment_dict = {
'author': author.id if author else 0,
'createdAt': date_parse(entry['createdAt']),
'body': html2text(entry['body']),
'shout': shout["slug"]
}
if entry.get('deleted'):
comment_dict['deletedAt'] = date_parse(entry['updatedAt'])
comment_dict['deletedBy'] = str(entry['updatedBy'])
if entry.get('updatedAt'):
comment_dict['updatedAt'] = date_parse(entry['updatedAt'])
# comment_dict['updatedBy'] = str(entry.get('updatedBy', 0)) invalid keyword for Comment
# print(comment_dict)
comment = Comment.create(**comment_dict)
comment_dict['id'] = comment.id
comment_dict['ratings'] = []
comment_dict['old_id'] = entry['_id']
# print(comment)
for comment_rating_old in entry.get('ratings',[]):
rater = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first()
if rater and comment:
comment_rating_dict = {
'value': comment_rating_old['value'],
'createdBy': rater.slug,
'comment_id': comment.id
}
cts = comment_rating_old.get('createdAt')
if cts: comment_rating_dict['createdAt'] = date_parse(cts)
try:
comment_rating = CommentRating.create(**comment_rating_dict)
comment_dict['ratings'].append(comment_rating_dict)
except Exception as e:
print(comment_rating_dict)
raise e
comment = Comment.create(**comment_dict)
comment_dict['id'] = comment.id
comment_dict['ratings'] = []
comment_dict['oid'] = entry['_id']
# print(comment)
for comment_rating_old in entry.get('ratings',[]):
rater = session.query(User).filter(User.oid == comment_rating_old['createdBy']).first()
if rater and comment:
comment_rating_dict = {
'value': comment_rating_old['value'],
'createdBy': rater.slug,
'comment_id': comment.id
}
cts = comment_rating_old.get('createdAt')
if cts: comment_rating_dict['createdAt'] = date_parse(cts)
try:
CommentRating.create(**comment_rating_dict)
comment_dict['ratings'].append(comment_rating_dict)
except Exception as e:
print('[migration] comment rating error: %r' % comment_rating_dict)
raise e
else:
print('[migration] error: cannot find shout for comment %r' % comment_dict)
return comment_dict
def migrate_2stage(cmt, old_new_id):

View File

@ -1,52 +0,0 @@
from orm.base import local_session
from orm import Topic, Community
from dateutil.parser import parse as date_parse
import json
from migration.html2text import html2text
import sqlalchemy
def migrate(entry, topics_by_oid):
'''
type Topic {
slug: String! # ID
createdBy: Int! # User
createdAt: DateTime!
value: String
children: [String] # children topic
}
'''
topic_dict = {
'slug': entry['slug'],
'oid': entry['_id'],
# 'createdBy': entry['createdBy'],
# 'createdAt': date_parse(entry['createdAt']),
'title': entry['title'].replace('&nbsp;', ' '), #.lower(),
'children': [],
'community' : Community.default_community.slug,
'body' : html2text(entry.get('description', '').replace('&nbsp;', ' '))
}
retopics = json.loads(open('migration/tables/replacements.json').read())
with local_session() as session:
slug = topics_by_oid.get(topic_dict['oid'], topic_dict)['slug']
if slug:
slug = retopics.get(slug, slug)
try:
topic = session.query(Topic).filter(Topic.slug == slug).first()
if not topic:
del topic_dict['oid']
topic = Topic.create(**topic_dict)
# print('created')
else:
if len(topic.title) > len(topic_dict['title']) or \
len(topic.body) < len(topic_dict['body']):
topic.update({
'slug': slug,
'title': topic_dict['title'] if len(topic.title) > len(topic_dict['title']) else topic.title,
'body': topic_dict['body'] if len(topic.body) < len(topic_dict['body']) else topic.body
})
except Exception as e:
print('not found old topic: ' + slug)
else:
raise Exception
topic_dict['oid'] = entry['_id']
return topic_dict

View File

@ -1,23 +1,21 @@
from dateutil.parser import parse as date_parse
import frontmatter
import json
from orm import Shout, ShoutTopic, ShoutRating, ShoutViewByDay, User, shout
import sqlalchemy
from orm import Shout, ShoutTopic, ShoutRating, ShoutViewByDay, User
from transliterate import translit
from datetime import datetime
from orm.base import local_session
from orm.community import Community
from migration.extract import prepare_body
import os
from orm.community import Community
DISCOURS_USER = {
'id': 9999999,
'slug': 'discours',
'name': 'Дискурс',
'email': 'welcome@discours.io',
'userpic': 'https://discours.io/images/logo-mini.svg',
'createdAt': '2016-03-05 22:22:00.350000'
}
OLD_DATE = '2016-03-05 22:22:00.350000'
retopics = json.loads(open('migration/tables/replacements.json').read())
ts = datetime.now()
type2layout = {
'Article': 'article',
@ -27,18 +25,6 @@ type2layout = {
'Image': 'image'
}
def get_metadata(r):
metadata = {}
metadata['title'] = r.get('title', '').replace('{', '(').replace('}', ')')
metadata['authors'] = r.get('authors')
metadata['createdAt'] = r.get('createdAt', ts)
metadata['layout'] = r['layout']
metadata['topics'] = [topic['slug'] for topic in r['topics']]
metadata['topics'].sort()
if r.get('cover', False):
metadata['cover'] = r.get('cover')
return metadata
def get_shout_slug(entry):
slug = entry.get('slug', '')
if not slug:
@ -47,18 +33,51 @@ def get_shout_slug(entry):
if slug: break
return slug
def migrate(entry, users_by_oid, topics_by_oid):
def migrate(entry, storage):
# init, set title and layout
r = {
'layout': type2layout[entry['type']],
'title': entry['title'],
'community': Community.default_community.id,
'community': 0,
'authors': [],
'topics': [],
'rating': 0,
'ratings': [],
'createdAt': []
}
topics_by_oid = storage['topics']['by_oid']
users_by_oid = storage['users']['by_oid']
# author
oid = entry.get('createdBy', entry.get('_id', entry.get('oid')))
userdata = users_by_oid.get(oid)
if not userdata:
app = entry.get('application')
if app:
userslug = translit(app['name'], 'ru', reversed=True)\
.replace(' ', '-')\
.replace('\'', '')\
.replace('.', '-').lower()
userdata = {
'username': app['email'],
'email': app['email'],
'name': app['name'],
'bio': app.get('bio', ''),
'emailConfirmed': False,
'slug': userslug,
'createdAt': ts,
'wasOnlineAt': ts
}
else:
userdata = {
'name': 'Дискурс',
'slug': 'discours',
'email': 'welcome@discours.io',
'userpic': 'https://discours.io/image/logo-mini.svg'
}
assert userdata, 'no user found for %s from ' % [oid, len(users_by_oid.keys())]
r['authors'] = [userdata, ]
# slug
@ -72,8 +91,7 @@ def migrate(entry, users_by_oid, topics_by_oid):
c = 'https://assets.discours.io/unsafe/1600x/' + entry['thumborId']
else:
c = entry.get('image', {}).get('url')
if not c or 'cloudinary' in c:
c = ''
if not c or 'cloudinary' in c: c = ''
r['cover'] = c
# timestamps
@ -85,111 +103,105 @@ def migrate(entry, users_by_oid, topics_by_oid):
if r['publishedAt'] == OLD_DATE: r['publishedAt'] = ts
if 'deletedAt' in entry: r['deletedAt'] = date_parse(entry['deletedAt'])
# connected users' data
# r['deletedBy'] = entry.get('deletedBy', '0') # TypeError: 'deletedBy' is an invalid keyword argument for Shout
oid = entry.get('createdBy', '')
userdata = users_by_oid.get(oid, {})
if not userdata.get('slug'):
app = entry.get('application')
if app:
userslug = translit(app['name'], 'ru', reversed=True).replace(' ', '-').replace('\'', '').replace('.', '-').lower()
userdata = {
'username': app['email'],
'email': app['email'],
'name': app['name'],
'bio': app.get('bio', ''),
'emailConfirmed': False,
'slug': userslug,
'createdAt': ts,
'wasOnlineAt': ts
}
if userdata == {}:
userdata = {
'name': 'Дискурс',
'slug': 'discours',
'userpic': 'https://discours.io/image/logo-mini.svg'
}
author = { # a short version for public listings
'slug': userdata.get('slug', 'discours'),
'name': userdata.get('name', 'Дискурс'),
'userpic': userdata.get('userpic', '')
}
r['authors'] = [ author, ]
# body
body = prepare_body(entry)
# save mdx for prerender if published
r['body'] = body
if entry.get('published'):
content = ''
metadata = get_metadata(r)
content = frontmatter.dumps(frontmatter.Post(r['body'], **metadata))
ext = 'mdx'
parentDir = '/'.join(os.getcwd().split('/')[:-1])
filepath = parentDir + '/discoursio-web/content/' + r['slug']
# print(filepath)
bc = bytes(content,'utf-8').decode('utf-8','ignore')
open(filepath + '.' + ext, 'w').write(bc)
# open(filepath + '.html', 'w').write(body_orig)
# topics
category = entry['category']
mainTopic = topics_by_oid.get(category)
if mainTopic:
r['mainTopic'] = mainTopic["slug"]
r['mainTopic'] = storage['replacements'].get(mainTopic["slug"], mainTopic["slug"])
topic_oids = [category, ]
topic_errors = []
topic_oids.extend(entry.get('tags', []))
for oid in topic_oids:
if oid in topics_by_oid:
r['topics'].append(topics_by_oid[oid])
if oid in storage['topics']['by_oid']:
r['topics'].append(storage['topics']['by_oid'][oid]['slug'])
else:
# print('ERROR: unknown old topic id: ' + oid)
topic_errors.append(oid)
# set prepared shout data
print('[migration] unknown old topic id: ' + oid)
shout_dict = r.copy()
del shout_dict['topics'] # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state'
del shout_dict['rating'] # FIXME: TypeError: 'rating' is an invalid keyword argument for Shout
del shout_dict['ratings']
entry['topics'] = r['topics']
entry['cover'] = r['cover']
entry['authors'] = r['authors']
# get author
user = None
email = userdata.get('email')
authorslug = userdata.get('slug')
with local_session() as session:
try:
if email: user = session.query(User).filter(User.email == email).first()
if not user and authorslug: user = session.query(User).filter(User.slug == authorslug).first()
if not user and userdata: user = User.create(**userdata)
except:
print('[migration] shout author error: \n%r' % entry)
raise Exception
assert user, 'could not get a user'
shout_dict['authors'] = [ user, ]
# body
r['body'] = prepare_body(entry)
# save shout to db
s = object()
try: s = Shout.create(**shout_dict)
except: print('[migration] shout create error: \n%r' % shout_dict)
shout_dict = r.copy()
user = None
del shout_dict['topics'] # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state'
del shout_dict['rating'] # FIXME: TypeError: 'rating' is an invalid keyword argument for Shout
del shout_dict['ratings']
email = userdata.get('email')
slug = userdata.get('slug')
with local_session() as session:
# c = session.query(Community).all().pop()
if email: user = session.query(User).filter(User.email == email).first()
if not user and slug: user = session.query(User).filter(User.slug == slug).first()
if not user and userdata:
try: user = User.create(**userdata)
except sqlalchemy.exc.IntegrityError:
print('[migration] user error: ' + userdata)
userdata['id'] = user.id
userdata['createdAt'] = user.createdAt
storage['users']['by_slug'][userdata['slug']] = userdata
storage['users']['by_oid'][entry['_id']] = userdata
assert user, 'could not get a user'
shout_dict['authors'] = [ user, ]
try:
s = Shout.create(**shout_dict)
except sqlalchemy.exc.IntegrityError:
with local_session() as session:
s = session.query(Shout).filter(Shout.slug == shout_dict['slug']).first()
bump = False
if s:
for key in shout_dict:
if key in s.__dict__:
if s.__dict__[key] != shout_dict[key]:
print('[migration] shout already exists, but differs in %s' % key)
bump = True
else:
print('[migration] shout already exists, but lacks %s' % key)
bump = True
if bump:
s.update(shout_dict)
else:
print('[migration] something went wrong with shout: \n%r' % shout_dict)
session.commit()
except:
print(s)
raise Exception
# shout topics aftermath
shout_dict['topics'] = []
for tpc in r['topics']:
oldslug = tpc
newslug = storage['replacements'].get(oldslug, oldslug)
if newslug:
with local_session() as session:
shout_topic_old = session.query(ShoutTopic)\
.filter(ShoutTopic.shout == s.slug)\
.filter(ShoutTopic.topic == oldslug).first()
if shout_topic_old:
shout_topic_old.update({ 'slug': newslug })
else:
shout_topic_new = session.query(ShoutTopic)\
.filter(ShoutTopic.shout == s.slug)\
.filter(ShoutTopic.topic == newslug).first()
if not shout_topic_new: ShoutTopic.create(**{ 'shout': s.slug, 'topic': newslug })
session.commit()
shout_dict['topics'].append(newslug)
else:
print('[migration] ignored topic slug: \n%r' % tpc['slug'])
# raise Exception
# shout ratings
try:
shout_dict['ratings'] = []
for shout_rating_old in entry.get('ratings',[]):
with local_session() as session:
rater = session.query(User).filter(User.old_id == shout_rating_old['createdBy']).first()
rater = session.query(User).filter(User.oid == shout_rating_old['createdBy']).first()
if rater:
shout_rating_dict = {
'value': shout_rating_old['value'],
@ -210,43 +222,10 @@ def migrate(entry, users_by_oid, topics_by_oid):
print('[migration] shout rating error: \n%r' % shout_rating_old)
# raise Exception
# shout topics
try:
shout_dict['topics'] = []
for topic in r['topics']:
tpc = topics_by_oid[topic['oid']]
oldslug = tpc['slug']
newslug = retopics.get(oldslug, oldslug)
need_create_topic = False
if newslug:
with local_session() as session:
shout_topic_new = session.query(ShoutTopic)\
.filter(ShoutTopic.shout == s.slug)\
.filter(ShoutTopic.topic == newslug).first()
shout_topic_old = session.query(ShoutTopic)\
.filter(ShoutTopic.shout == s.slug)\
.filter(ShoutTopic.topic == oldslug).first()
if not shout_topic_new:
if shout_topic_old:
shout_topic_old.update({ 'slug': newslug })
else:
need_create_topic = True
if need_create_topic:
ShoutTopic.create(**{ 'shout': s.slug, 'topic': newslug })
shout_dict['topics'].append(newslug)
except:
print('[migration] shout topic error: \n%r' % topic)
raise Exception
# shout views
try:
views = entry.get('views', 1)
ShoutViewByDay.create(
shout = s.slug,
value = views
)
except:
print('[migration] shout view error: \n%r' % entry)
# raise Exception
shout_dict['old_id'] = entry.get('_id')
return shout_dict, topic_errors
ShoutViewByDay.create( shout = s.slug, value = entry.get('views', 1) )
del shout_dict['ratings']
shout_dict['oid'] = entry.get('_id')
storage['shouts']['by_oid'][entry['_id']] = shout_dict
storage['shouts']['by_slug'][slug] = shout_dict
return shout_dict

View File

@ -1,46 +0,0 @@
import json
from datetime import datetime
from orm.base import local_session
from orm import Topic, Community
from dateutil.parser import parse as date_parse
def migrate(entry, topics_by_oid):
'''
type Topic {
slug: String! # ID
createdBy: Int! # User
createdAt: DateTime!
title: String
parents: [String] # NOTE: topic can have parent topics
children: [String] # and children
}
'''
if type(entry['createdAt']) == type(''):
ts = date_parse(entry['createdAt'])
else:
ts = datetime.fromtimestamp(entry['createdAt']/1000)
topic_dict = {
'slug': entry['slug'],
'oid': entry['_id'],
# 'createdBy': entry['createdBy'],
# 'createdAt': ts,
'title': entry['title'].replace('&nbsp;', ' '), # .lower(),
'children': [],
'community' : Community.default_community.slug,
'body' : entry.get('description','').replace('&nbsp;', ' ')
}
try:
retopics = json.loads(open('migration/tables/replacements.json').read())
with local_session() as session:
slug = topics_by_oid.get(topic_dict['oid'], topic_dict)['slug']
slug = retopics.get(slug, slug)
if slug:
topic = session.query(Topic).filter(Topic.slug == slug).first()
if not topic:
del topic_dict['oid']
topic = Topic.create(**topic_dict)
except Exception as e:
# print(e)
raise e
topic_dict['oid'] = entry['_id']
return topic_dict

View File

@ -0,0 +1,28 @@
from migration.extract import extract, html2text
from orm.base import local_session
from orm import Topic, Community
def migrate(entry):
body_orig = entry.get('description', '').replace('&nbsp;', ' ')
topic_dict = {
'slug': entry['slug'],
'oid': entry['_id'],
'title': entry['title'].replace('&nbsp;', ' '), #.lower(),
'children': [],
'community' : Community.default_community.slug
}
topic_dict['body'] = extract(html2text(body_orig), entry['_id'])
with local_session() as session:
slug = topic_dict['slug']
topic = session.query(Topic).filter(Topic.slug == slug).first()
if not topic:
topic = Topic.create(**topic_dict)
if len(topic.title) > len(topic_dict['title']):
topic.update({ 'title': topic_dict['title'] })
if len(topic.body) < len(topic_dict['body']):
topic.update({ 'body': topic_dict['body'] })
session.commit()
# print(topic.__dict__)
rt = topic.__dict__.copy()
del rt['_sa_instance_state']
return rt

View File

@ -1,124 +1,114 @@
import sqlalchemy
from orm import User, Role, UserRating
from orm import User, UserRating
from orm.user import EmailSubscription
import frontmatter
from dateutil.parser import parse
from migration.html2text import html2text
from orm.base import local_session
def migrate(entry):
'''
type User {
username: String! # email
createdAt: DateTime!
email: String
password: String
oauth: String # provider:token
name: String # to display
userpic: String
links: [String]
emailConfirmed: Boolean # should contain all emails too
id: Int!
muted: Boolean
roles: [Role]
updatedAt: DateTime
wasOnlineAt: DateTime
ratings: [Rating]
slug: String
bio: String
notifications: [Int]
if 'subscribedTo' in entry: del entry['subscribedTo']
email = entry['emails'][0]['address']
user_dict = {
'oid': entry['_id'],
'roles': [],
'ratings': [],
'username': email,
'email': email,
'password': entry['services']['password'].get('bcrypt', ''),
'createdAt': parse(entry['createdAt']),
'emailConfirmed': bool(entry['emails'][0]['verified']),
'muted': False, # amnesty
'bio': entry['profile'].get('bio', ''),
'notifications': [],
'createdAt': parse(entry['createdAt']),
'roles': [], # entry['roles'] # roles by community
'ratings': [], # entry['ratings']
'links': [],
'name': 'anonymous'
}
'''
res = {}
res['old_id'] = entry['_id']
res['password'] = entry['services']['password'].get('bcrypt', '')
del entry['services']
if 'subscribedTo' in entry: #TODO: use subscribedTo
del entry['subscribedTo']
res['username'] = entry['emails'][0]['address']
res['email'] = res['username']
res['wasOnlineAt'] = parse(entry.get('loggedInAt', entry['createdAt']))
res['emailConfirmed'] = entry['emails'][0]['verified']
res['createdAt'] = parse(entry['createdAt'])
res['roles'] = [] # entry['roles'] # roles by community
res['ratings'] = [] # entry['ratings']
res['notifications'] = []
res['links'] = []
res['muted'] = False
res['name'] = 'anonymous'
if 'updatedAt' in entry: user_dict['updatedAt'] = parse(entry['updatedAt'])
if 'wasOnineAt' in entry: user_dict['wasOnlineAt'] = parse(entry['wasOnlineAt'])
if entry.get('profile'):
# slug
res['slug'] = entry['profile'].get('path')
res['bio'] = entry['profile'].get('bio','')
user_dict['slug'] = entry['profile'].get('path')
user_dict['bio'] = entry['profile'].get('bio','')
# userpic
try: res['userpic'] = 'https://assets.discours.io/unsafe/100x/' + entry['profile']['thumborId']
try: user_dict['userpic'] = 'https://assets.discours.io/unsafe/100x/' + entry['profile']['thumborId']
except KeyError:
try: res['userpic'] = entry['profile']['image']['url']
except KeyError: res['userpic'] = ''
try: user_dict['userpic'] = entry['profile']['image']['url']
except KeyError: user_dict['userpic'] = ''
# name
fn = entry['profile'].get('firstName', '')
ln = entry['profile'].get('lastName', '')
name = res['slug'] if res['slug'] else 'anonymous'
name = user_dict['slug'] if user_dict['slug'] else 'anonymous'
name = fn if fn else name
name = (name + ' ' + ln) if ln else name
name = entry['profile']['path'].lower().replace(' ', '-') if len(name) < 2 else name
res['name'] = name
user_dict['name'] = name
# links
fb = entry['profile'].get('facebook', False)
if fb:
res['links'].append(fb)
if fb: user_dict['links'].append(fb)
vk = entry['profile'].get('vkontakte', False)
if vk:
res['links'].append(vk)
if vk: user_dict['links'].append(vk)
tr = entry['profile'].get('twitter', False)
if tr:
res['links'].append(tr)
if tr: user_dict['links'].append(tr)
ws = entry['profile'].get('website', False)
if ws:
res['links'].append(ws)
if ws: user_dict['links'].append(ws)
# some checks
if not res['slug'] and len(res['links']) > 0: res['slug'] = res['links'][0].split('/')[-1]
if not user_dict['slug'] and len(user_dict['links']) > 0:
user_dict['slug'] = user_dict['links'][0].split('/')[-1]
res['slug'] = res.get('slug', res['email'].split('@')[0])
old = res['old_id']
user = User.create(**res.copy())
res['id'] = user.id
return res
user_dict['slug'] = user_dict.get('slug', user_dict['email'].split('@')[0])
oid = user_dict['oid']
try: user = User.create(**user_dict.copy())
except sqlalchemy.exc.IntegrityError:
print('[migration] cannot create user ' + user_dict['slug'])
with local_session() as session:
old_user = session.query(User).filter(User.slug == user_dict['slug']).first()
old_user.oid = oid
user = old_user
if not user:
print('[migration] ERROR: cannot find user ' + user_dict['slug'])
raise Exception
user_dict['id'] = user.id
return user_dict
def migrate_email_subscription(entry):
res = {}
res["email"] = entry["email"]
res["createdAt"] = parse(entry["createdAt"])
subscription = EmailSubscription.create(**res)
EmailSubscription.create(**res)
def migrate_2stage(entry, id_map):
ce = 0
for rating_entry in entry.get('ratings',[]):
rater_old_id = rating_entry['createdBy']
rater_slug = id_map.get(rater_old_id)
rater_oid = rating_entry['createdBy']
rater_slug = id_map.get(rater_oid)
if not rater_slug:
ce +=1
# print(rating_entry)
continue
old_id = entry['_id']
oid = entry['_id']
author_slug = id_map.get(oid)
user_rating_dict = {
'value': rating_entry['value'],
'rater': rater_slug,
'user': id_map.get(old_id)
'user': author_slug
}
with local_session() as session:
try:
user_rating = UserRating.create(**user_rating_dict)
except sqlalchemy.exc.IntegrityError:
print('[migration] duplicate rating solving for ' + rater_slug)
old_rating = session.query(UserRating).filter(UserRating.rater == rater_slug).first()
old_rating.value = rating_entry['value'] + old_rating.value
print('[migration] cannot create ' + author_slug + '`s rate from ' + rater_slug)
print('[migration] concat rating value %d+%d=%d' % (old_rating.value, rating_entry['value'], old_rating.value + rating_entry['value']))
old_rating.update({ 'value': old_rating.value + rating_entry['value'] })
session.commit()
except Exception as e:
print(e)
return ce

View File

@ -17,16 +17,14 @@ class CommentRating(Base):
class Comment(Base):
__tablename__ = 'comment'
author: int = Column(ForeignKey("user.id"), nullable=False, comment="Sender")
body: str = Column(String, nullable=False, comment="Comment Body")
createdAt = Column(DateTime, nullable=False, default = datetime.now, comment="Created at")
createdBy: str = Column(ForeignKey("user.slug"), nullable=False, comment="Sender")
updatedAt = Column(DateTime, nullable=True, comment="Updated at")
updatedBy = Column(ForeignKey("user.id"), nullable=True, comment="Last Editor")
updatedBy = Column(ForeignKey("user.slug"), nullable=True, comment="Last Editor")
deletedAt = Column(DateTime, nullable=True, comment="Deleted at")
deletedBy = Column(ForeignKey("user.id"), nullable=True, comment="Deleted by")
deletedBy = Column(ForeignKey("user.slug"), nullable=True, comment="Deleted by")
shout = Column(ForeignKey("shout.slug"), nullable=False)
replyTo: int = Column(ForeignKey("comment.id"), nullable=True, comment="comment ID")
ratings = relationship(CommentRating, foreign_keys=CommentRating.comment_id)
# TODO: work in progress, udpate this code
oid: str = Column(String, nullable=True)

View File

@ -16,11 +16,11 @@ class Community(Base):
__tablename__ = 'community'
name: str = Column(String, nullable=False, comment="Name")
slug: str = Column(String, unique = True, nullable = False)
slug: str = Column(String, nullable = False)
desc: str = Column(String, nullable=False, default='')
pic: str = Column(String, nullable=False, default='')
createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Created at")
createdBy: str = Column(ForeignKey("user.id"), nullable=False, comment="Creator")
createdBy: str = Column(ForeignKey("user.slug"), nullable=False, comment="Creator")
@staticmethod
def init_table():

View File

@ -328,14 +328,14 @@ class Shout(Base):
id = None
slug: str = Column(String, primary_key=True)
community: int = Column(Integer, ForeignKey("community.id"), nullable=False, comment="Community")
community: str = Column(Integer, ForeignKey("community.id"), nullable=False, comment="Community")
body: str = Column(String, nullable=False, comment="Body")
createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Created at")
updatedAt: str = Column(DateTime, nullable=True, comment="Updated at")
replyTo: int = Column(ForeignKey("shout.slug"), nullable=True)
versionOf: int = Column(ForeignKey("shout.slug"), nullable=True)
tags: str = Column(String, nullable=True)
publishedBy: bool = Column(ForeignKey("user.id"), nullable=True)
publishedBy: int = Column(ForeignKey("user.id"), nullable=True)
publishedAt: str = Column(DateTime, nullable=True)
cover: str = Column(String, nullable = True)
title: str = Column(String, nullable = True)
@ -346,6 +346,8 @@ class Shout(Base):
topics = relationship(lambda: Topic, secondary=ShoutTopic.__tablename__)
mainTopic = Column(ForeignKey("topic.slug"), nullable=True)
visibleFor = relationship(lambda: User, secondary=ShoutViewer.__tablename__)
draft: bool = Column(Boolean, default=True)
oid: str = Column(String, nullable=True)
@property
async def stat(self):

View File

@ -24,6 +24,7 @@ class Topic(Base):
pic: str = Column(String, nullable=True, comment="Picture")
children = Column(JSONType, nullable=True, default = [], comment="list of children topics")
community = Column(ForeignKey("community.slug"), nullable=False, comment="Community")
oid: str = Column(String, nullable=True, comment="Old ID")
class TopicStorage:
topics = {}

View File

@ -68,7 +68,7 @@ class User(Base):
notifications = relationship(lambda: UserNotifications)
ratings = relationship(UserRating, foreign_keys=UserRating.user)
roles = relationship(lambda: Role, secondary=UserRole.__tablename__)
old_id: str = Column(String, nullable = True)
oid: str = Column(String, nullable = True)
@staticmethod
def init_table():

View File

@ -1,6 +1,7 @@
from orm import Comment, CommentRating
from orm.base import local_session
from orm.shout import ShoutCommentsSubscription
from orm.user import User
from resolvers.base import mutation, query, subscription
from auth.authenticate import login_required
import asyncio
@ -68,7 +69,7 @@ async def update_comment(_, info, id, body):
comment = session.query(Comment).filter(Comment.id == id).first()
if not comment:
return {"error": "invalid comment id"}
if comment.author != user_id:
if comment.createdBy != user_id:
return {"error": "access denied"}
comment.body = body
@ -88,7 +89,7 @@ async def delete_comment(_, info, id):
comment = session.query(Comment).filter(Comment.id == id).first()
if not comment:
return {"error": "invalid comment id"}
if comment.author != user_id:
if comment.createdBy != user_id:
return {"error": "access denied"}
comment.deletedAt = datetime.now()
@ -103,12 +104,12 @@ async def rate_comment(_, info, id, value):
user_id = auth.user_id
with local_session() as session:
user = session.query(User).filter(User.id == user_id).first()
comment = session.query(Comment).filter(Comment.id == id).first()
if not comment:
return {"error": "invalid comment id"}
rating = session.query(CommentRating).\
filter(CommentRating.comment_id == id, CommentRating.createdBy == user_id).first()
filter(CommentRating.comment_id == id, CommentRating.createdBy == user.slug).first()
if rating:
rating.value = value
session.commit()
@ -124,7 +125,8 @@ async def rate_comment(_, info, id, value):
def get_subscribed_shout_comments(slug):
with local_session() as session:
rows = session.query(ShoutCommentsSubscription.shout).\
filter(ShoutCommentsSubscription.subscriber == slug, ShoutCommentsSubscription.deletedAt == None).\
filter(ShoutCommentsSubscription.subscriber == slug,\
ShoutCommentsSubscription.deletedAt == None).\
all()
slugs = [row.shout for row in rows]
return slugs

View File

@ -91,7 +91,7 @@ async def user_comments(_, info, slug, page, size):
page = page - 1
with local_session() as session:
comments = session.query(Comment).\
filter(Comment.author == user.id).\
filter(Comment.createdBy == user.id).\
order_by(desc(Comment.createdAt)).\
limit(size).\
offset(page * size)
@ -198,7 +198,7 @@ async def shouts_reviewed(_, info, page, size):
where(and_(Shout.publishedAt != None, ShoutRating.rater == user.slug))
shouts_by_comment = session.query(Shout).\
join(Comment).\
where(and_(Shout.publishedAt != None, Comment.author == user.id))
where(and_(Shout.publishedAt != None, Comment.createdBy == user.id))
shouts = shouts_by_rating.union(shouts_by_comment).\
order_by(desc(Shout.publishedAt)).\
limit(size).\
@ -215,7 +215,7 @@ async def shouts_commented_by_user(_, info, slug, page, size):
with local_session() as session:
shouts = session.query(Shout).\
join(Comment).\
where(Comment.author == user.id).\
where(Comment.createdBy == user.id).\
order_by(desc(Comment.createdAt)).\
limit(size).\
offset( (page - 1) * size)

View File

@ -272,7 +272,7 @@ async def get_shout_comments(_, info, slug):
filter(Comment.shout == slug).\
group_by(Comment.id).all()
for comment in comments:
comment.author = await UserStorage.get_user(comment.author)
comment.createdBy = await UserStorage.get_user(comment.createdBy)
return comments
@query.field("shoutsByTopics")

View File

@ -282,26 +282,24 @@ type User {
bio: String
notifications: [Int]
communities: [Int] # user participating communities
old_id: String
oid: String
}
type Comment {
id: Int!
shout: Int!
author: User!
createdBy: User!
body: String!
replyTo: Int
replyTo: Comment!
createdAt: DateTime!
updatedAt: DateTime
updatedBy: Int
shout: Shout!
deletedAt: DateTime
deletedBy: Int
deletedBy: User
ratings: [CommentRating]
views: Int
old_id: String
oid: String
old_thread: String
}
type CommentRating {
id: Int!
comment_id: Int!
@ -318,22 +316,21 @@ type Shout {
createdAt: DateTime!
authors: [User!]!
ratings: [Rating]
visibleFor: [User]
community: String
cover: String
layout: String
# replyTo: Shout
versionOf: Shout
tags: [String] # actual values
draft: Boolean
versionOf: Shout # translations and adaptations
visibleFor: [User]
topics: [Topic]
mainTopic: String
title: String
subtitle: String
updatedAt: DateTime
updatedBy: Int # can be user id?
updatedBy: User
deletedAt: DateTime
deletedBy: Int
publishedBy: Int # if there is no published field - it is not published
deletedBy: User
publishedBy: User
publishedAt: DateTime
stat: ShoutStat
}
@ -369,6 +366,7 @@ type Topic {
children: [String] # and children
community: String!
topicStat: TopicStat
oid: String
}
enum ProposalStatus {