migration-is-back

2022-08-11 12:14:12 +03:00 · 2022-08-11 12:14:12 +03:00 · 65532ea1a3
commit 65532ea1a3
parent 83f5f280b2
20 changed files with 3848 additions and 0 deletions
--- a/migrate.py
+++ b/migrate.py
@ -0,0 +1,303 @@
 ''' cmd managed migration '''
 from datetime import datetime
 import json
 import subprocess
 import sys
 import os
 # from migration.export import export_email_subscriptions
 from migration.export import export_mdx, export_slug
 from migration.tables.users import migrate as migrateUser
 from migration.tables.users import migrate_2stage as migrateUser_2stage
 from migration.tables.content_items import get_shout_slug, migrate as migrateShout
 from migration.tables.topics import migrate as migrateTopic
 from migration.tables.comments import migrate as migrateComment
 from migration.tables.comments import migrate_2stage as migrateComment_2stage
 from orm.reaction import Reaction
 from settings import DB_URL
 TODAY = datetime.strftime(datetime.now(), '%Y%m%d')
 OLD_DATE = '2016-03-05 22:22:00.350000'
 def users_handle(storage):
 	''' migrating users first '''
 	counter = 0
 	id_map = {}
 	print('[migration] migrating %d users' % (len(storage['users']['data'])))
 	for entry in storage['users']['data']:
 		oid = entry['_id']
 		user = migrateUser(entry)
 		storage['users']['by_oid'][oid] = user  # full
 		del user['password']
 		del user['notifications']
 		del user['emailConfirmed']
 		del user['username']
 		del user['email']
 		storage['users']['by_slug'][user['slug']] = user  # public
 		id_map[user['oid']] = user['slug']
 		counter += 1
 	ce = 0
 	for entry in storage['users']['data']:
 		ce += migrateUser_2stage(entry, id_map)
 	return storage
 def topics_handle(storage):
 	''' topics from categories and tags '''
 	counter = 0
 	for t in (storage['topics']['tags'] + storage['topics']['cats']):
 		if t['slug'] in storage['replacements']:
 			t['slug'] = storage['replacements'][t['slug']]
 			topic = migrateTopic(t)
 			storage['topics']['by_oid'][t['_id']] = topic
 			storage['topics']['by_slug'][t['slug']] = topic
 			counter += 1
 		else:
 			print('[migration] topic ' + t['slug'] + ' ignored')
 	for oldslug, newslug in storage['replacements'].items():
 		if oldslug != newslug and oldslug in storage['topics']['by_slug']:
 			oid = storage['topics']['by_slug'][oldslug]['_id']
 			del storage['topics']['by_slug'][oldslug]
 			storage['topics']['by_oid'][oid] = storage['topics']['by_slug'][newslug]
 	print('[migration] ' + str(counter) + ' topics migrated')
 	print('[migration] ' + str(len(storage['topics']
 		  ['by_oid'].values())) + ' topics by oid')
 	print('[migration] ' + str(len(storage['topics']
 		  ['by_slug'].values())) + ' topics by slug')
 	# raise Exception
 	return storage
 def shouts_handle(storage, args):
 	''' migrating content items one by one '''
 	counter = 0
 	discours_author = 0
 	pub_counter = 0
 	for entry in storage['shouts']['data']:
 		# slug
 		slug = get_shout_slug(entry)
 		 # single slug mode
 		if '-' in args and slug not in args: continue
 		# migrate
 		shout = migrateShout(entry, storage)
 		storage['shouts']['by_oid'][entry['_id']] = shout
 		storage['shouts']['by_slug'][shout['slug']] = shout
 		# shouts.topics
 		if not shout['topics']: print('[migration] no topics!')
 		# wuth author
 		author = shout['authors'][0].slug
 		if author == 'discours': discours_author += 1
 		# print('[migration] ' + shout['slug'] + ' with author ' + author)
 		if entry.get('published'):
 			if 'mdx' in args: export_mdx(shout)
 			pub_counter += 1
 		# print main counter
 		counter += 1
 		line = str(counter+1) + ': ' + shout['slug'] + " @" + author
 		print(line)
 	print('[migration] ' + str(counter) + ' content items were migrated')
 	print('[migration] ' + str(pub_counter) + ' have been published')
 	print('[migration] ' + str(discours_author) + ' authored by @discours')
 	return storage
 def comments_handle(storage):
 	id_map = {}
 	ignored_counter = 0
 	missed_shouts = {}
 	for oldcomment in storage['reactions']['data']:
 		if not oldcomment.get('deleted'):
 			reaction = migrateComment(oldcomment, storage)
 			if type(reaction) == str:
 				missed_shouts[reaction] = oldcomment
 			elif type(reaction) == Reaction:
 				reaction = reaction.dict()
 				id = reaction['id']
 				oid = reaction['oid']
 				id_map[oid] = id
 			else:
 				ignored_counter += 1
 	for reaction in storage['reactions']['data']: migrateComment_2stage(
 		reaction, id_map)
 	print('[migration] ' + str(len(id_map)) + ' comments migrated')
 	print('[migration] ' + str(ignored_counter) + ' comments ignored')
 	print('[migration] ' + str(len(missed_shouts.keys())) +
 		  ' commented shouts missed')
 	missed_counter = 0
 	for missed in missed_shouts.values():
 		missed_counter += len(missed)
 	print('[migration] ' + str(missed_counter) + ' comments dropped')
 	return storage
 def bson_handle():
 	# decode bson # preparing data
 	from migration import bson2json
 	bson2json.json_tables()
 def export_one(slug, storage):
 	topics_handle(storage)
 	users_handle(storage)
 	shouts_handle(storage)
 	export_slug(slug, storage)
 def all_handle(storage, args):
 	print('[migration] handle everything')
 	users_handle(storage)
 	topics_handle(storage)
 	shouts_handle(storage, args)
 	comments_handle(storage)
 	# export_email_subscriptions()
 	print('[migration] done!')
 def data_load():
 	storage = {
 		'content_items': {
 			'by_oid': {},
 			'by_slug': {},
 		},
 		'shouts': {
 			'by_oid': {},
 			'by_slug': {},
 			'data': []
 		},
 		'reactions': {
 			'by_oid': {},
 			'by_slug': {},
 			'by_content': {},
 			'data':	[]
 		},
 		'topics': {
 			'by_oid': {},
 			'by_slug': {},
 			'cats': [],
 			'tags': [],
 		},
 		'users': {
 			'by_oid': {},
 			'by_slug': {},
 			'data': []
 		},
 		'replacements': json.loads(open('migration/tables/replacements.json').read())
 	}
 	users_data = []
 	tags_data = []
 	cats_data = []
 	comments_data = []
 	content_data = []
 	try:
 		users_data = json.loads(open('migration/data/users.json').read())
 		print('[migration] ' + str(len(users_data)) + ' users ')
 		tags_data = json.loads(open('migration/data/tags.json').read())
 		storage['topics']['tags'] = tags_data
 		print('[migration] ' + str(len(tags_data)) + ' tags ')
 		cats_data = json.loads(
 			open('migration/data/content_item_categories.json').read())
 		storage['topics']['cats'] = cats_data
 		print('[migration] ' + str(len(cats_data)) + ' cats ')
 		comments_data = json.loads(open('migration/data/comments.json').read())
 		storage['reactions']['data'] = comments_data
 		print('[migration] ' + str(len(comments_data)) + ' comments ')
 		content_data = json.loads(open('migration/data/content_items.json').read())
 		storage['shouts']['data'] = content_data
 		print('[migration] ' + str(len(content_data)) + ' content items ')
 		# fill out storage
 		for x in users_data:
 			storage['users']['by_oid'][x['_id']] = x
 			# storage['users']['by_slug'][x['slug']] = x
 		# no user.slug yet
 		print('[migration] ' + str(len(storage['users']
 			  ['by_oid'].keys())) + ' users by oid')
 		for x in tags_data:
 			storage['topics']['by_oid'][x['_id']] = x
 			storage['topics']['by_slug'][x['slug']] = x
 		for x in cats_data:
 			storage['topics']['by_oid'][x['_id']] = x
 			storage['topics']['by_slug'][x['slug']] = x
 		print('[migration] ' + str(len(storage['topics']
 			  ['by_slug'].keys())) + ' topics by slug')
 		for item in content_data:
 			slug = get_shout_slug(item)
 			storage['content_items']['by_slug'][slug] = item
 			storage['content_items']['by_oid'][item['_id']] = item
 		print('[migration] ' + str(len(content_data)) + ' content items')
 		for x in comments_data:
 			storage['reactions']['by_oid'][x['_id']] = x
 			cid = x['contentItem']
 			storage['reactions']['by_content'][cid] = x
 			ci = storage['content_items']['by_oid'].get(cid, {})
 			if 'slug' in ci: storage['reactions']['by_slug'][ci['slug']] = x
 		print('[migration] ' + str(len(storage['reactions']
 			  ['by_content'].keys())) + ' with comments')
 	except Exception as e: raise e
 	storage['users']['data'] = users_data
 	storage['topics']['tags'] = tags_data
 	storage['topics']['cats'] = cats_data
 	storage['shouts']['data'] = content_data
 	storage['reactions']['data'] = comments_data
 	return storage
 def mongo_download(url):
 	if not url: raise Exception('\n\nYou should set MONGODB_URL enviroment variable\n')
 	print('[migration] mongodump ' + url)
 	subprocess.call([
 		'mongodump',
 		'--uri', url + '/?authSource=admin',
 		'--forceTableScan',
 	], stderr = subprocess.STDOUT)
 def create_pgdump():
 	pgurl = DB_URL
 	if not pgurl: raise Exception('\n\nYou should set DATABASE_URL enviroment variable\n')
 	subprocess.call(
 		[ 'pg_dump', pgurl, '-f', TODAY + '-pgdump.sql'], 
 		stderr = subprocess.STDOUT
 	)
 	subprocess.call([
 		'scp',
 		TODAY + '-pgdump.sql',
 		'root@build.discours.io:/root/.'
 	])
 def handle_auto():
 	print('[migration] no command given, auto mode')
 	mongo_download(os.getenv('MONGODB_URL'))
 	bson_handle()
 	all_handle(data_load(), sys.argv)
 	create_pgdump()
 def migrate():
 	if len(sys.argv) > 1:
 		cmd=sys.argv[1]
 		if type(cmd) == str: print('[migration] command: ' + cmd)
 		if cmd == 'mongodb':
 			mongo_download(sys.argv[2])
 		elif cmd == 'bson':
 			bson_handle()
 		else:
 			storage=data_load()
 			if cmd == '-': export_one(sys.argv[2], storage)
 			else: all_handle(storage, sys.argv)
 	elif len(sys.argv) == 1:
 		handle_auto()
 	else:
 		print('[migration] usage: python migrate.py <command>')
 		print('[migration] commands: mongodb, bson, all, all mdx, - <slug>')
 if __name__ == '__main__':
 	migrate()
--- a/migration/init.py
+++ b/migration/init.py
@ -0,0 +1 @@
 __all__ = ["tables", "bson2json", "html2md"]
--- a/migration/bson2json.py
+++ b/migration/bson2json.py
@ -0,0 +1,28 @@
 import os
 import bson
 import json
 from migration.utils import DateTimeEncoder
 def json_tables():
 	print('[migration] unpack dump/discours/*.bson to migration/data/*.json')
 	data = {
 		"content_items": [],
 		"content_item_categories": [],
 		"tags": [],
 		"email_subscriptions": [],
 		"users": [],
 		"comments": []
 	}
 	for table in data.keys():
 		lc = []
 		with open('dump/discours/'+table+'.bson', 'rb') as f:
 			bs = f.read()
 			f.close()
 			base = 0
 			while base < len(bs):
 				base, d = bson.decode_document(bs, base)
 				lc.append(d)
 			data[table] = lc
 			open(os.getcwd() + '/migration/data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))
--- a/migration/export.py
+++ b/migration/export.py
@ -0,0 +1,105 @@
 from datetime import datetime
 import json
 import os
 import frontmatter
 from migration.extract import extract_html, prepare_html_body
 from migration.utils import DateTimeEncoder
 OLD_DATE = '2016-03-05 22:22:00.350000'
 EXPORT_DEST = '../discoursio-web/data/'
 parentDir = '/'.join(os.getcwd().split('/')[:-1])
 contentDir = parentDir + '/discoursio-web/content/'
 ts = datetime.now()
 def get_metadata(r):
 	authors = []
 	for a in r['authors']:
 		authors.append({ # a short version for public listings
 			'slug': a.slug or 'discours',
 			'name': a.name or 'Дискурс',
 			'userpic': a.userpic or 'https://discours.io/static/img/discours.png'
 		})
 	metadata = {}
 	metadata['title'] = r.get('title', '').replace('{', '(').replace('}', ')')
 	metadata['authors'] = authors
 	metadata['createdAt'] = r.get('createdAt', ts)
 	metadata['layout'] = r['layout']
 	metadata['topics'] = [topic for topic in r['topics']]
 	metadata['topics'].sort()
 	if r.get('cover', False): metadata['cover'] = r.get('cover')
 	return metadata
 def export_mdx(r):
 	# print('[export] mdx %s' % r['slug']) 
 	content = ''
 	metadata = get_metadata(r)
 	content = frontmatter.dumps(frontmatter.Post(r['body'], **metadata))
 	ext = 'mdx'
 	filepath = contentDir + r['slug']
 	bc = bytes(content,'utf-8').decode('utf-8','ignore')
 	open(filepath + '.' + ext, 'w').write(bc)
 def export_body(shout, storage):
 	entry = storage['content_items']['by_oid'][shout['oid']]
 	if entry:
 		shout['body'] = prepare_html_body(entry) # prepare_md_body(entry)
 		export_mdx(shout)
 		print('[export] html for %s' % shout['slug'])
 		body = extract_html(entry)
 		open(contentDir + shout['slug'] + '.html', 'w').write(body)
 	else:
 		raise Exception('no content_items entry found')
 def export_slug(slug, storage):
 	shout = storage['shouts']['by_slug'][slug]
 	shout = storage['shouts']['by_slug'].get(slug)
 	assert shout, '[export] no shout found by slug: %s ' % slug
 	author = shout['authors'][0]
 	assert author, '[export] no author error'
 	export_body(shout, storage)
 def export_email_subscriptions():
 	email_subscriptions_data = json.loads(open('migration/data/email_subscriptions.json').read())
 	for data in email_subscriptions_data:
 		# migrate_email_subscription(data)
 		pass
 	print('[migration] ' + str(len(email_subscriptions_data)) + ' email subscriptions exported')
 def export_shouts(storage):
 	# update what was just migrated or load json again
 	if len(storage['users']['by_slugs'].keys()) == 0:
 		storage['users']['by_slugs'] = json.loads(open(EXPORT_DEST + 'authors.json').read())
 		print('[migration] ' + str(len(storage['users']['by_slugs'].keys())) + ' exported authors ')
 	if len(storage['shouts']['by_slugs'].keys()) == 0:
 		storage['shouts']['by_slugs'] = json.loads(open(EXPORT_DEST + 'articles.json').read())
 		print('[migration] ' + str(len(storage['shouts']['by_slugs'].keys())) + ' exported articles ')
 	for slug in storage['shouts']['by_slugs'].keys(): export_slug(slug, storage)
 def export_json(export_articles = {}, export_authors = {}, export_topics = {}, export_comments = {}):
 	open(EXPORT_DEST + 'authors.json', 'w').write(json.dumps(export_authors,
 															cls=DateTimeEncoder,
 															indent=4,
 															sort_keys=True,
 															ensure_ascii=False))
 	print('[migration] ' + str(len(export_authors.items())) + ' authors exported')
 	open(EXPORT_DEST + 'topics.json', 'w').write(json.dumps(export_topics,
 														cls=DateTimeEncoder,
 														indent=4,
 														sort_keys=True,
 														ensure_ascii=False))
 	print('[migration] ' + str(len(export_topics.keys())) + ' topics exported')
 	open(EXPORT_DEST + 'articles.json', 'w').write(json.dumps(export_articles,
 															cls=DateTimeEncoder,
 															indent=4,
 															sort_keys=True,
 															ensure_ascii=False))
 	print('[migration] ' + str(len(export_articles.items())) + ' articles exported')
 	open(EXPORT_DEST + 'comments.json', 'w').write(json.dumps(export_comments,
 															cls=DateTimeEncoder,
 															indent=4,
 															sort_keys=True,
 															ensure_ascii=False))
 	print('[migration] ' + str(len(export_comments.items())) + ' exported articles with comments')
--- a/migration/extract.py
+++ b/migration/extract.py
@ -0,0 +1,324 @@
 import os
 import re
 import base64
 from migration.html2text import html2text
 TOOLTIP_REGEX = r'(\/\/\/(.+)\/\/\/)'
 contentDir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'discoursio-web', 'content')
 s3 = 'https://discours-io.s3.amazonaws.com/'
 cdn = 'https://assets.discours.io'
 def replace_tooltips(body): 
 	# FIXME: if you prefer regexp
 	newbody = body
 	matches = list(re.finditer(TOOLTIP_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
 	for match in matches:
 		newbody = body.replace(match.group(1), '<Tooltip text="' + match.group(2) + '" />') # FIXME: doesn't work
 	if len(matches) > 0: 
 		print('[extract] found %d tooltips' % len(matches))
 	return newbody
 def place_tooltips(body):
 	parts = body.split('&&&')
 	l = len(parts)
 	newparts = list(parts)
 	placed = False
 	if l & 1:
 		if l > 1: 
 			i = 1
 			print('[extract] found %d tooltips' % (l-1))
 			for part in parts[1:]:
 				if i & 1: 
 					placed = True
 					if 'a class="footnote-url" href=' in part:
 						print('[extract] footnote: ' + part)
 						fn = 'a class="footnote-url" href="'
 						link = part.split(fn,1)[1].split('"', 1)[0]
 						extracted_part = part.split(fn,1)[0] + ' ' + part.split('/', 1)[-1]
 						newparts[i] = '<Tooltip' + (' link="' + link + '" ' if link else '') + '>' + extracted_part + '</Tooltip>'
 					else:
 						newparts[i] = '<Tooltip>%s</Tooltip>' % part
 						# print('[extract] ' + newparts[i])
 				else:
 					# print('[extract] ' + part[:10] + '..')
 					newparts[i] = part
 				i += 1
 	return (''.join(newparts), placed)
 IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}=|[A-Za-z\d+\/]{2}==)))\)"
 parentDir = '/'.join(os.getcwd().split('/')[:-1])
 public = parentDir + '/discoursio-web/public'
 cache = {}
 def reextract_images(body, oid): 
 	# FIXME: if you prefer regexp
 	matches = list(re.finditer(IMG_REGEX, body, re.IGNORECASE | re.MULTILINE))[1:]
 	i = 0
 	for match in matches:
 		print('[extract] image ' + match.group(1))
 		ext = match.group(3)
 		name = oid + str(i)
 		link = public + '/upload/image-' + name + '.' + ext
 		img = match.group(4)
 		title = match.group(1) # FIXME: this is not the title
 		if img not in cache:
 			content = base64.b64decode(img + '==')
 			print(str(len(img)) + ' image bytes been written')
 			open('../' + link, 'wb').write(content)
 			cache[img] = name
 			i += 1
 		else:
 			print('[extract] image cached ' + cache[img])
 		body.replace(str(match), '![' + title + '](' + cdn + link + ')') # FIXME: this does not work
 	return body
 IMAGES = {
 	'data:image/png': 'png',
 	'data:image/jpg': 'jpg',
 	'data:image/jpeg': 'jpg',
 }
 b64 = ';base64,'
 def extract_imageparts(bodyparts, prefix):
 	# recursive loop
 	newparts = list(bodyparts)
 	for current in bodyparts:
 		i = bodyparts.index(current)
 		for mime in IMAGES.keys():
 			if mime == current[-len(mime):] and (i + 1 < len(bodyparts)):
 				print('[extract] ' + mime)
 				next = bodyparts[i+1]
 				ext = IMAGES[mime]
 				b64end = next.index(')')
 				b64encoded = next[:b64end]
 				name = prefix + '-' + str(len(cache))
 				link = '/upload/image-' + name + '.' + ext
 				print('[extract] name: ' + name)
 				print('[extract] link: ' + link)
 				print('[extract] %d bytes' % len(b64encoded))
 				if b64encoded not in cache:
 					try:
 						content = base64.b64decode(b64encoded + '==')
 						open(public + link, 'wb').write(content)
 						print('[extract] ' +str(len(content)) + ' image bytes been written')
 						cache[b64encoded] = name
 					except:
 						raise Exception
 						# raise Exception('[extract] error decoding image %r' %b64encoded)
 				else:
 					print('[extract] cached link ' + cache[b64encoded])
 					name = cache[b64encoded]
 					link = cdn + '/upload/image-' + name + '.' + ext
 				newparts[i] = current[:-len(mime)] + current[-len(mime):] + link + next[-b64end:]
 				newparts[i+1] = next[:-b64end]
 				break
 	return extract_imageparts(newparts[i] + newparts[i+1] + b64.join(bodyparts[i+2:]), prefix) \
 		if len(bodyparts) > (i + 1) else ''.join(newparts)
 def extract_dataimages(parts, prefix):
 	newparts = list(parts)
 	for part in parts:
 		i = parts.index(part)
 		if part.endswith(']('):
 			[ext, rest] = parts[i+1].split(b64)
 			name = prefix + '-' + str(len(cache))
 			if ext == '/jpeg': ext = 'jpg'
 			else: ext = ext.replace('/', '')
 			link = '/upload/image-' + name + '.' + ext
 			print('[extract] filename: ' + link)
 			b64end = rest.find(')')
 			if b64end !=-1:
 				b64encoded = rest[:b64end]
 				print('[extract] %d text bytes' % len(b64encoded))
 				# write if not cached
 				if b64encoded not in cache:
 					try:
 						content = base64.b64decode(b64encoded + '==')
 						open(public + link, 'wb').write(content)
 						print('[extract] ' +str(len(content)) + ' image bytes')
 						cache[b64encoded] = name
 					except:
 						raise Exception
 						# raise Exception('[extract] error decoding image %r' %b64encoded)
 				else:
 					print('[extract] 0 image bytes, cached for ' + cache[b64encoded])
 					name = cache[b64encoded]
 				# update link with CDN
 				link = cdn + '/upload/image-' + name + '.' + ext
 				# patch newparts
 				newparts[i+1] = link + rest[b64end:]
 			else:
 				raise Exception('cannot find the end of base64 encoded string')
 		else:
 			print('[extract] dataimage skipping part ' + str(i))
 			continue
 	return ''.join(newparts)
 di = 'data:image'
 def extract_md_images(body, oid):
 	newbody = ''
 	body = body\
 		.replace('\n! []('+di, '\n ![]('+di)\
 		.replace('\n[]('+di, '\n![]('+di)\
 		.replace(' []('+di, ' ![]('+di)
 	parts = body.split(di)
 	i = 0
 	if len(parts) > 1: newbody = extract_dataimages(parts, oid)
 	else: newbody = body
 	return newbody
 def cleanup(body):
 	newbody = body\
 		.replace('<', '').replace('>', '')\
 		.replace('{', '(').replace('}', ')')\
 		.replace('…', '...')\
 		.replace(' __ ', ' ')\
 		.replace('_ _', ' ')\
 		.replace('****',  '')\
 		.replace('\u00a0', ' ')\
 		.replace('\u02c6', '^')\
 		.replace('\u00a0',' ')\
 		.replace('\ufeff', '')\
 		.replace('\u200b', '')\
 		.replace('\u200c', '')\
 		# .replace('\u2212', '-')
 	return newbody
 def extract_md(body, oid):
 	newbody = body
 	if newbody:
 		newbody = extract_md_images(newbody, oid)
 		if not newbody: raise Exception('extract_images error')
 		newbody = cleanup(newbody)
 		if not newbody: raise Exception('cleanup error')
 		newbody, placed = place_tooltips(newbody)
 		if not newbody: raise Exception('place_tooltips error')
 		if placed:
 			newbody = 'import Tooltip from \'$/components/Article/Tooltip\'\n\n' + newbody
 	return newbody
 def prepare_md_body(entry):
 	# body modifications
 	body = ''
 	kind = entry.get('type')
 	addon = ''
 	if kind == 'Video':
 		addon = ''
 		for m in entry.get('media', []):
 			if 'youtubeId' in m: addon += '<VideoPlayer youtubeId=\'' + m['youtubeId'] + '\' />\n'
 			elif 'vimeoId' in m: addon += '<VideoPlayer vimeoId=\''  + m['vimeoId'] + '\' />\n'
 			else:
 				print('[extract] media is not supported')
 				print(m)
 		body = 'import VideoPlayer from \'$/components/Article/VideoPlayer\'\n\n' + addon
 	elif kind == 'Music':
 		addon = ''
 		for m in entry.get('media', []):
 			artist = m.get('performer')
 			trackname = ''
 			if artist: trackname += artist + ' - '
 			if 'title' in m: trackname += m.get('title','')
 			addon += '<MusicPlayer src=\"' + m.get('fileUrl','') + '\" title=\"' + trackname + '\" />\n'
 		body = 'import MusicPlayer from \'$/components/Article/MusicPlayer\'\n\n' + addon
 	body_orig = extract_html(entry)
 	if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
 	if not body: print('[extract] empty MDX body')
 	return body
 def prepare_html_body(entry):
 	# body modifications
 	body = ''
 	kind = entry.get('type')
 	addon = ''
 	if kind == 'Video':
 		addon = ''
 		for m in entry.get('media', []):
 			if 'youtubeId' in m: 
 				addon += '<iframe width="420" height="345" src="http://www.youtube.com/embed/'
 				addon += m['youtubeId']
 				addon += '?autoplay=1" frameborder="0" allowfullscreen></iframe>\n'
 			elif 'vimeoId' in m: 
 				addon += '<iframe src="https://player.vimeo.com/video/'
 				addon += m['vimeoId']
 				addon += ' width="420" height="345" frameborder="0" allow="autoplay; fullscreen" allowfullscreen></iframe>'
 			else:
 				print('[extract] media is not supported')
 				print(m)
 		body += addon
 	elif kind == 'Music':
 		addon = ''
 		for m in entry.get('media', []):
 			artist = m.get('performer')
 			trackname = ''
 			if artist: trackname += artist + ' - '
 			if 'title' in m: trackname += m.get('title','')
 			addon += '<figure><figcaption>'
 			addon += trackname
 			addon += '</figcaption><audio controls src="' 
 			addon += m.get('fileUrl','') 
 			addon += '"></audio></figure>'
 		body += addon
 	body = extract_html(entry)
 	# if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
 	if not body: print('[extract] empty HTML body')
 	return body
 def extract_html(entry):
 	body_orig = entry.get('body') or ''
 	media = entry.get('media', [])
 	kind = entry.get('type') or ''
 	print('[extract] kind: ' + kind)
 	mbodies = set([])
 	if media:
 		# print('[extract] media is found')
 		for m in media:
 			mbody = m.get('body', '')
 			addon = ''
 			if kind == 'Literature':
 				mbody = m.get('literatureBody') or m.get('body', '')
 			elif kind == 'Image':
 				cover = ''
 				if 'thumborId' in entry: cover = cdn + '/unsafe/1600x/' + entry['thumborId']
 				if not cover:
 					if 'image' in entry: cover = entry['image'].get('url', '')
 					if 'cloudinary' in cover: cover = ''
 				# else: print('[extract] cover: ' + cover)
 				title = m.get('title','').replace('\n', ' ').replace('&nbsp;', ' ')
 				u = m.get('thumborId') or cover or ''
 				if title: addon += '<h4>' + title + '</h4>\n'
 				if not u.startswith('http'): u = s3 + u
 				if not u: print('[extract] no image url for ' + str(m))
 				if 'cloudinary' in u: u = 'img/lost.svg'
 				if u != cover or (u == cover and media.index(m) == 0):
 					addon += '<img src=\"' + u + '\" alt=\"'+ title +'\" />\n'
 			if addon:
 				body_orig += addon
 				# print('[extract] item addon: ' + addon)
 			# if addon: print('[extract] addon: %s' % addon)
 			if mbody and mbody not in mbodies:
 				mbodies.add(mbody)
 				body_orig += mbody
 		if len(list(mbodies)) != len(media):
 			print('[extract] %d/%d media item bodies appended' % (len(list(mbodies)),len(media)))
 		# print('[extract] media items body: \n' + body_orig)
 	if not body_orig:
 		for up in entry.get('bodyHistory', []) or []:
 			body_orig = up.get('text', '') or ''
 			if body_orig: 
 				print('[extract] got html body from history')
 				break
 	if not body_orig: print('[extract] empty HTML body')
 	# body_html = str(BeautifulSoup(body_orig, features="html.parser"))
 	return body_orig
--- a/migration/html2text/init.py
+++ b/migration/html2text/init.py
--- a/migration/html2text/main.py
+++ b/migration/html2text/main.py
@ -0,0 +1,3 @@
 from .cli import main
 main()
--- a/migration/html2text/cli.py
+++ b/migration/html2text/cli.py
@ -0,0 +1,322 @@
 import argparse
 import sys
 from . import HTML2Text, __version__, config
 def main() -> None:
    baseurl = ""
    class bcolors:
        HEADER = "\033[95m"
        OKBLUE = "\033[94m"
        OKGREEN = "\033[92m"
        WARNING = "\033[93m"
        FAIL = "\033[91m"
        ENDC = "\033[0m"
        BOLD = "\033[1m"
        UNDERLINE = "\033[4m"
    p = argparse.ArgumentParser()
    p.add_argument(
        "--default-image-alt",
        dest="default_image_alt",
        default=config.DEFAULT_IMAGE_ALT,
        help="The default alt string for images with missing ones",
    )
    p.add_argument(
        "--pad-tables",
        dest="pad_tables",
        action="store_true",
        default=config.PAD_TABLES,
        help="pad the cells to equal column width in tables",
    )
    p.add_argument(
        "--no-wrap-links",
        dest="wrap_links",
        action="store_false",
        default=config.WRAP_LINKS,
        help="don't wrap links during conversion",
    )
    p.add_argument(
        "--wrap-list-items",
        dest="wrap_list_items",
        action="store_true",
        default=config.WRAP_LIST_ITEMS,
        help="wrap list items during conversion",
    )
    p.add_argument(
        "--wrap-tables",
        dest="wrap_tables",
        action="store_true",
        default=config.WRAP_TABLES,
        help="wrap tables",
    )
    p.add_argument(
        "--ignore-emphasis",
        dest="ignore_emphasis",
        action="store_true",
        default=config.IGNORE_EMPHASIS,
        help="don't include any formatting for emphasis",
    )
    p.add_argument(
        "--reference-links",
        dest="inline_links",
        action="store_false",
        default=config.INLINE_LINKS,
        help="use reference style links instead of inline links",
    )
    p.add_argument(
        "--ignore-links",
        dest="ignore_links",
        action="store_true",
        default=config.IGNORE_ANCHORS,
        help="don't include any formatting for links",
    )
    p.add_argument(
        "--ignore-mailto-links",
        action="store_true",
        dest="ignore_mailto_links",
        default=config.IGNORE_MAILTO_LINKS,
        help="don't include mailto: links",
    )
    p.add_argument(
        "--protect-links",
        dest="protect_links",
        action="store_true",
        default=config.PROTECT_LINKS,
        help="protect links from line breaks surrounding them with angle brackets",
    )
    p.add_argument(
        "--ignore-images",
        dest="ignore_images",
        action="store_true",
        default=config.IGNORE_IMAGES,
        help="don't include any formatting for images",
    )
    p.add_argument(
        "--images-as-html",
        dest="images_as_html",
        action="store_true",
        default=config.IMAGES_AS_HTML,
        help=(
            "Always write image tags as raw html; preserves `height`, `width` and "
            "`alt` if possible."
        ),
    )
    p.add_argument(
        "--images-to-alt",
        dest="images_to_alt",
        action="store_true",
        default=config.IMAGES_TO_ALT,
        help="Discard image data, only keep alt text",
    )
    p.add_argument(
        "--images-with-size",
        dest="images_with_size",
        action="store_true",
        default=config.IMAGES_WITH_SIZE,
        help=(
            "Write image tags with height and width attrs as raw html to retain "
            "dimensions"
        ),
    )
    p.add_argument(
        "-g",
        "--google-doc",
        action="store_true",
        dest="google_doc",
        default=False,
        help="convert an html-exported Google Document",
    )
    p.add_argument(
        "-d",
        "--dash-unordered-list",
        action="store_true",
        dest="ul_style_dash",
        default=False,
        help="use a dash rather than a star for unordered list items",
    )
    p.add_argument(
        "-e",
        "--asterisk-emphasis",
        action="store_true",
        dest="em_style_asterisk",
        default=False,
        help="use an asterisk rather than an underscore for emphasized text",
    )
    p.add_argument(
        "-b",
        "--body-width",
        dest="body_width",
        type=int,
        default=config.BODY_WIDTH,
        help="number of characters per output line, 0 for no wrap",
    )
    p.add_argument(
        "-i",
        "--google-list-indent",
        dest="list_indent",
        type=int,
        default=config.GOOGLE_LIST_INDENT,
        help="number of pixels Google indents nested lists",
    )
    p.add_argument(
        "-s",
        "--hide-strikethrough",
        action="store_true",
        dest="hide_strikethrough",
        default=False,
        help="hide strike-through text. only relevant when -g is " "specified as well",
    )
    p.add_argument(
        "--escape-all",
        action="store_true",
        dest="escape_snob",
        default=False,
        help=(
            "Escape all special characters.  Output is less readable, but avoids "
            "corner case formatting issues."
        ),
    )
    p.add_argument(
        "--bypass-tables",
        action="store_true",
        dest="bypass_tables",
        default=config.BYPASS_TABLES,
        help="Format tables in HTML rather than Markdown syntax.",
    )
    p.add_argument(
        "--ignore-tables",
        action="store_true",
        dest="ignore_tables",
        default=config.IGNORE_TABLES,
        help="Ignore table-related tags (table, th, td, tr) " "while keeping rows.",
    )
    p.add_argument(
        "--single-line-break",
        action="store_true",
        dest="single_line_break",
        default=config.SINGLE_LINE_BREAK,
        help=(
            "Use a single line break after a block element rather than two line "
            "breaks. NOTE: Requires --body-width=0"
        ),
    )
    p.add_argument(
        "--unicode-snob",
        action="store_true",
        dest="unicode_snob",
        default=config.UNICODE_SNOB,
        help="Use unicode throughout document",
    )
    p.add_argument(
        "--no-automatic-links",
        action="store_false",
        dest="use_automatic_links",
        default=config.USE_AUTOMATIC_LINKS,
        help="Do not use automatic links wherever applicable",
    )
    p.add_argument(
        "--no-skip-internal-links",
        action="store_false",
        dest="skip_internal_links",
        default=config.SKIP_INTERNAL_LINKS,
        help="Do not skip internal links",
    )
    p.add_argument(
        "--links-after-para",
        action="store_true",
        dest="links_each_paragraph",
        default=config.LINKS_EACH_PARAGRAPH,
        help="Put links after each paragraph instead of document",
    )
    p.add_argument(
        "--mark-code",
        action="store_true",
        dest="mark_code",
        default=config.MARK_CODE,
        help="Mark program code blocks with [code]...[/code]",
    )
    p.add_argument(
        "--decode-errors",
        dest="decode_errors",
        default=config.DECODE_ERRORS,
        help=(
            "What to do in case of decode errors.'ignore', 'strict' and 'replace' are "
            "acceptable values"
        ),
    )
    p.add_argument(
        "--open-quote",
        dest="open_quote",
        default=config.OPEN_QUOTE,
        help="The character used to open quotes",
    )
    p.add_argument(
        "--close-quote",
        dest="close_quote",
        default=config.CLOSE_QUOTE,
        help="The character used to close quotes",
    )
    p.add_argument(
        "--version", action="version", version=".".join(map(str, __version__))
    )
    p.add_argument("filename", nargs="?")
    p.add_argument("encoding", nargs="?", default="utf-8")
    args = p.parse_args()
    if args.filename and args.filename != "-":
        with open(args.filename, "rb") as fp:
            data = fp.read()
    else:
        data = sys.stdin.buffer.read()
    try:
        html = data.decode(args.encoding, args.decode_errors)
    except UnicodeDecodeError as err:
        warning = bcolors.WARNING + "Warning:" + bcolors.ENDC
        warning += " Use the " + bcolors.OKGREEN
        warning += "--decode-errors=ignore" + bcolors.ENDC + " flag."
        print(warning)
        raise err
    h = HTML2Text(baseurl=baseurl)
    # handle options
    if args.ul_style_dash:
        h.ul_item_mark = "-"
    if args.em_style_asterisk:
        h.emphasis_mark = "*"
        h.strong_mark = "__"
    h.body_width = args.body_width
    h.google_list_indent = args.list_indent
    h.ignore_emphasis = args.ignore_emphasis
    h.ignore_links = args.ignore_links
    h.ignore_mailto_links = args.ignore_mailto_links
    h.protect_links = args.protect_links
    h.ignore_images = args.ignore_images
    h.images_as_html = args.images_as_html
    h.images_to_alt = args.images_to_alt
    h.images_with_size = args.images_with_size
    h.google_doc = args.google_doc
    h.hide_strikethrough = args.hide_strikethrough
    h.escape_snob = args.escape_snob
    h.bypass_tables = args.bypass_tables
    h.ignore_tables = args.ignore_tables
    h.single_line_break = args.single_line_break
    h.inline_links = args.inline_links
    h.unicode_snob = args.unicode_snob
    h.use_automatic_links = args.use_automatic_links
    h.skip_internal_links = args.skip_internal_links
    h.links_each_paragraph = args.links_each_paragraph
    h.mark_code = args.mark_code
    h.wrap_links = args.wrap_links
    h.wrap_list_items = args.wrap_list_items
    h.wrap_tables = args.wrap_tables
    h.pad_tables = args.pad_tables
    h.default_image_alt = args.default_image_alt
    h.open_quote = args.open_quote
    h.close_quote = args.close_quote
    sys.stdout.write(h.handle(html))
--- a/migration/html2text/config.py
+++ b/migration/html2text/config.py
@ -0,0 +1,164 @@
 import re
 # Use Unicode characters instead of their ascii pseudo-replacements
 UNICODE_SNOB = True
 # Marker to use for marking tables for padding post processing
 TABLE_MARKER_FOR_PAD = "special_marker_for_table_padding"
 # Escape all special characters.  Output is less readable, but avoids
 # corner case formatting issues.
 ESCAPE_SNOB = True
 # Put the links after each paragraph instead of at the end.
 LINKS_EACH_PARAGRAPH = False
 # Wrap long lines at position. 0 for no wrapping.
 BODY_WIDTH = 0
 # Don't show internal links (href="#local-anchor") -- corresponding link
 # targets won't be visible in the plain text file anyway.
 SKIP_INTERNAL_LINKS = False
 # Use inline, rather than reference, formatting for images and links
 INLINE_LINKS = True
 # Protect links from line breaks surrounding them with angle brackets (in
 # addition to their square brackets)
 PROTECT_LINKS = True
 WRAP_LINKS = True
 # Wrap list items.
 WRAP_LIST_ITEMS = False
 # Wrap tables
 WRAP_TABLES = False
 # Number of pixels Google indents nested lists
 GOOGLE_LIST_INDENT = 36
 # Values Google and others may use to indicate bold text
 BOLD_TEXT_STYLE_VALUES = ("bold", "700", "800", "900")
 IGNORE_ANCHORS = False
 IGNORE_MAILTO_LINKS = False
 IGNORE_IMAGES = False
 IMAGES_AS_HTML = False
 IMAGES_TO_ALT = False
 IMAGES_WITH_SIZE = False
 IGNORE_EMPHASIS = False
 MARK_CODE = True
 DECODE_ERRORS = "strict"
 DEFAULT_IMAGE_ALT = ""
 PAD_TABLES = True
 # Convert links with same href and text to <href> format
 # if they are absolute links
 USE_AUTOMATIC_LINKS = True
 # For checking space-only lines on line 771
 RE_SPACE = re.compile(r"\s\+")
 RE_ORDERED_LIST_MATCHER = re.compile(r"\d+\.\s")
 RE_UNORDERED_LIST_MATCHER = re.compile(r"[-\*\+]\s")
 RE_MD_CHARS_MATCHER = re.compile(r"([\\\[\]\(\)])")
 RE_MD_CHARS_MATCHER_ALL = re.compile(r"([`\*_{}\[\]\(\)#!])")
 # to find links in the text
 RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)")
 # to find table separators
 RE_TABLE = re.compile(r" \| ")
 RE_MD_DOT_MATCHER = re.compile(
    r"""
    ^             # start of line
    (\s*\d+)      # optional whitespace and a number
    (\.)          # dot
    (?=\s)        # lookahead assert whitespace
    """,
    re.MULTILINE | re.VERBOSE,
 )
 RE_MD_PLUS_MATCHER = re.compile(
    r"""
    ^
    (\s*)
    (\+)
    (?=\s)
    """,
    flags=re.MULTILINE | re.VERBOSE,
 )
 RE_MD_DASH_MATCHER = re.compile(
    r"""
    ^
    (\s*)
    (-)
    (?=\s|\-)     # followed by whitespace (bullet list, or spaced out hr)
                  # or another dash (header or hr)
    """,
    flags=re.MULTILINE | re.VERBOSE,
 )
 RE_SLASH_CHARS = r"\`*_{}[]()#+-.!"
 RE_MD_BACKSLASH_MATCHER = re.compile(
    r"""
    (\\)          # match one slash
    (?=[%s])      # followed by a char that requires escaping
    """
    % re.escape(RE_SLASH_CHARS),
    flags=re.VERBOSE,
 )
 UNIFIABLE = {
    "rsquo": "'",
    "lsquo": "'",
    "rdquo": '"',
    "ldquo": '"',
    "copy": "(C)",
    "mdash": "--",
    "nbsp": " ",
    "rarr": "->",
    "larr": "<-",
    "middot": "*",
    "ndash": "-",
    "oelig": "oe",
    "aelig": "ae",
    "agrave": "a",
    "aacute": "a",
    "acirc": "a",
    "atilde": "a",
    "auml": "a",
    "aring": "a",
    "egrave": "e",
    "eacute": "e",
    "ecirc": "e",
    "euml": "e",
    "igrave": "i",
    "iacute": "i",
    "icirc": "i",
    "iuml": "i",
    "ograve": "o",
    "oacute": "o",
    "ocirc": "o",
    "otilde": "o",
    "ouml": "o",
    "ugrave": "u",
    "uacute": "u",
    "ucirc": "u",
    "uuml": "u",
    "lrm": "",
    "rlm": "",
 }
 # Format tables in HTML rather than Markdown syntax
 BYPASS_TABLES = False
 # Ignore table-related tags (table, th, td, tr) while keeping rows
 IGNORE_TABLES = False
 # Use a single line break after a block element rather than two line breaks.
 # NOTE: Requires body width setting to be 0.
 SINGLE_LINE_BREAK = False
 # Use double quotation marks when converting the <q> tag.
 OPEN_QUOTE = '"'
 CLOSE_QUOTE = '"'
--- a/migration/html2text/elements.py
+++ b/migration/html2text/elements.py
@ -0,0 +1,18 @@
 from typing import Dict, Optional
 class AnchorElement:
    __slots__ = ["attrs", "count", "outcount"]
    def __init__(self, attrs: Dict[str, Optional[str]], count: int, outcount: int):
        self.attrs = attrs
        self.count = count
        self.outcount = outcount
 class ListElement:
    __slots__ = ["name", "num"]
    def __init__(self, name: str, num: int):
        self.name = name
        self.num = num
--- a/migration/html2text/py.typed
+++ b/migration/html2text/py.typed
--- a/migration/html2text/typing.py
+++ b/migration/html2text/typing.py
@ -0,0 +1,3 @@
 class OutCallback:
    def __call__(self, s: str) -> None:
        ...
--- a/migration/html2text/utils.py
+++ b/migration/html2text/utils.py
@ -0,0 +1,290 @@
 import html.entities
 from typing import Dict, List, Optional
 from . import config
 unifiable_n = {
    html.entities.name2codepoint[k]: v
    for k, v in config.UNIFIABLE.items()
    if k != "nbsp"
 }
 def hn(tag: str) -> int:
    if tag[0] == "h" and len(tag) == 2:
        n = tag[1]
        if "0" < n <= "9":
            return int(n)
    return 0
 def dumb_property_dict(style: str) -> Dict[str, str]:
    """
    :returns: A hash of css attributes
    """
    return {
        x.strip().lower(): y.strip().lower()
        for x, y in [z.split(":", 1) for z in style.split(";") if ":" in z]
    }
 def dumb_css_parser(data: str) -> Dict[str, Dict[str, str]]:
    """
    :type data: str
    :returns: A hash of css selectors, each of which contains a hash of
    css attributes.
    :rtype: dict
    """
    # remove @import sentences
    data += ";"
    importIndex = data.find("@import")
    while importIndex != -1:
        data = data[0:importIndex] + data[data.find(";", importIndex) + 1 :]
        importIndex = data.find("@import")
    # parse the css. reverted from dictionary comprehension in order to
    # support older pythons
    pairs = [x.split("{") for x in data.split("}") if "{" in x.strip()]
    try:
        elements = {a.strip(): dumb_property_dict(b) for a, b in pairs}
    except ValueError:
        elements = {}  # not that important
    return elements
 def element_style(
    attrs: Dict[str, Optional[str]],
    style_def: Dict[str, Dict[str, str]],
    parent_style: Dict[str, str],
 ) -> Dict[str, str]:
    """
    :type attrs: dict
    :type style_def: dict
    :type style_def: dict
    :returns: A hash of the 'final' style attributes of the element
    :rtype: dict
    """
    style = parent_style.copy()
    if "class" in attrs:
        assert attrs["class"] is not None
        for css_class in attrs["class"].split():
            css_style = style_def.get("." + css_class, {})
            style.update(css_style)
    if "style" in attrs:
        assert attrs["style"] is not None
        immediate_style = dumb_property_dict(attrs["style"])
        style.update(immediate_style)
    return style
 def google_list_style(style: Dict[str, str]) -> str:
    """
    Finds out whether this is an ordered or unordered list
    :type style: dict
    :rtype: str
    """
    if "list-style-type" in style:
        list_style = style["list-style-type"]
        if list_style in ["disc", "circle", "square", "none"]:
            return "ul"
    return "ol"
 def google_has_height(style: Dict[str, str]) -> bool:
    """
    Check if the style of the element has the 'height' attribute
    explicitly defined
    :type style: dict
    :rtype: bool
    """
    return "height" in style
 def google_text_emphasis(style: Dict[str, str]) -> List[str]:
    """
    :type style: dict
    :returns: A list of all emphasis modifiers of the element
    :rtype: list
    """
    emphasis = []
    if "text-decoration" in style:
        emphasis.append(style["text-decoration"])
    if "font-style" in style:
        emphasis.append(style["font-style"])
    if "font-weight" in style:
        emphasis.append(style["font-weight"])
    return emphasis
 def google_fixed_width_font(style: Dict[str, str]) -> bool:
    """
    Check if the css of the current element defines a fixed width font
    :type style: dict
    :rtype: bool
    """
    font_family = ""
    if "font-family" in style:
        font_family = style["font-family"]
    return "courier new" == font_family or "consolas" == font_family
 def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
    """
    Extract numbering from list element attributes
    :type attrs: dict
    :rtype: int or None
    """
    if "start" in attrs:
        assert attrs["start"] is not None
        try:
            return int(attrs["start"]) - 1
        except ValueError:
            pass
    return 0
 def skipwrap(
    para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
 ) -> bool:
    # If it appears to contain a link
    # don't wrap
    if not wrap_links and config.RE_LINK.search(para):
        return True
    # If the text begins with four spaces or one tab, it's a code block;
    # don't wrap
    if para[0:4] == "    " or para[0] == "\t":
        return True
    # If the text begins with only two "--", possibly preceded by
    # whitespace, that's an emdash; so wrap.
    stripped = para.lstrip()
    if stripped[0:2] == "--" and len(stripped) > 2 and stripped[2] != "-":
        return False
    # I'm not sure what this is for; I thought it was to detect lists,
    # but there's a <br>-inside-<span> case in one of the tests that
    # also depends upon it.
    if stripped[0:1] in ("-", "*") and not stripped[0:2] == "**":
        return not wrap_list_items
    # If text contains a pipe character it is likely a table
    if not wrap_tables and config.RE_TABLE.search(para):
        return True
    # If the text begins with a single -, *, or +, followed by a space,
    # or an integer, followed by a ., followed by a space (in either
    # case optionally proceeded by whitespace), it's a list; don't wrap.
    return bool(
        config.RE_ORDERED_LIST_MATCHER.match(stripped)
        or config.RE_UNORDERED_LIST_MATCHER.match(stripped)
    )
 def escape_md(text: str) -> str:
    """
    Escapes markdown-sensitive characters within other markdown
    constructs.
    """
    return config.RE_MD_CHARS_MATCHER.sub(r"\\\1", text)
 def escape_md_section(text: str, snob: bool = False) -> str:
    """
    Escapes markdown-sensitive characters across whole document sections.
    """
    text = config.RE_MD_BACKSLASH_MATCHER.sub(r"\\\1", text)
    if snob:
        text = config.RE_MD_CHARS_MATCHER_ALL.sub(r"\\\1", text)
    text = config.RE_MD_DOT_MATCHER.sub(r"\1\\\2", text)
    text = config.RE_MD_PLUS_MATCHER.sub(r"\1\\\2", text)
    text = config.RE_MD_DASH_MATCHER.sub(r"\1\\\2", text)
    return text
 def reformat_table(lines: List[str], right_margin: int) -> List[str]:
    """
    Given the lines of a table
    padds the cells and returns the new lines
    """
    # find the maximum width of the columns
    max_width = [len(x.rstrip()) + right_margin for x in lines[0].split("|")]
    max_cols = len(max_width)
    for line in lines:
        cols = [x.rstrip() for x in line.split("|")]
        num_cols = len(cols)
        # don't drop any data if colspan attributes result in unequal lengths
        if num_cols < max_cols:
            cols += [""] * (max_cols - num_cols)
        elif max_cols < num_cols:
            max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
            max_cols = num_cols
        max_width = [
            max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
        ]
    # reformat
    new_lines = []
    for line in lines:
        cols = [x.rstrip() for x in line.split("|")]
        if set(line.strip()) == set("-|"):
            filler = "-"
            new_cols = [
                x.rstrip() + (filler * (M - len(x.rstrip())))
                for x, M in zip(cols, max_width)
            ]
            new_lines.append("|-" + "|".join(new_cols) + "|")
        else:
            filler = " "
            new_cols = [
                x.rstrip() + (filler * (M - len(x.rstrip())))
                for x, M in zip(cols, max_width)
            ]
            new_lines.append("| " + "|".join(new_cols) + "|")
    return new_lines
 def pad_tables_in_text(text: str, right_margin: int = 1) -> str:
    """
    Provide padding for tables in the text
    """
    lines = text.split("\n")
    table_buffer = []  # type: List[str]
    table_started = False
    new_lines = []
    for line in lines:
        # Toggle table started
        if config.TABLE_MARKER_FOR_PAD in line:
            table_started = not table_started
            if not table_started:
                table = reformat_table(table_buffer, right_margin)
                new_lines.extend(table)
                table_buffer = []
                new_lines.append("")
            continue
        # Process lines
        if table_started:
            table_buffer.append(line)
        else:
            new_lines.append(line)
    return "\n".join(new_lines)
--- a/migration/tables/init.py
+++ b/migration/tables/init.py
@ -0,0 +1 @@
 __all__ = ["users", "tags", "content_items", "comments"],
--- a/migration/tables/comments.py
+++ b/migration/tables/comments.py
@ -0,0 +1,108 @@
 from datetime import datetime
 from dateutil.parser import parse as date_parse
 from orm import Reaction, User
 from orm import reaction
 from orm.base import local_session
 from migration.html2text import html2text
 from orm.reaction import ReactionKind
 from orm.shout import Shout
 ts = datetime.now()
 def migrate(entry, storage):
 	'''
 	{
 	  "_id": "hdtwS8fSyFLxXCgSC",
 	  "body": "<p>",
 	  "contentItem": "mnK8KsJHPRi8DrybQ",
 	  "createdBy": "bMFPuyNg6qAD2mhXe",
 	  "thread": "01/",
 	  "createdAt": "2016-04-19 04:33:53+00:00",
 	  "ratings": [
 		{ "createdBy": "AqmRukvRiExNpAe8C", "value": 1 },
 		{ "createdBy": "YdE76Wth3yqymKEu5", "value": 1 }
 	  ],
 	  "rating": 2,
 	  "updatedAt": "2020-05-27 19:22:57.091000+00:00",
 	  "updatedBy": "0"
 	}
 	->
 	type Reaction {
 		id: Int!
 		shout: Shout!
 		createdAt: DateTime!
 		createdBy: User!
 		updatedAt: DateTime
 		deletedAt: DateTime
 		deletedBy: User
 		range: String # full / 0:2340
 		kind: ReactionKind!
 		body: String
 		replyTo: Reaction
 		stat: Stat
 		old_id: String
 		old_thread: String
 		}
 	'''
 	reaction_dict = {}
 	# FIXME: comment_dict['createdAt'] = ts if not entry.get('createdAt') else date_parse(entry.get('createdAt'))
 	# print('[migration] comment original date %r' % entry.get('createdAt'))
 	# print('[migration] comment date %r ' % comment_dict['createdAt'])
 	reaction_dict['body'] = html2text(entry.get('body', ''))
 	reaction_dict['oid'] = entry['_id']
 	if entry.get('createdAt'): reaction_dict['createdAt'] = date_parse(entry.get('createdAt'))
 	shout_oid = entry.get('contentItem')
 	if not shout_oid in storage['shouts']['by_oid']: 
 		if len(storage['shouts']['by_oid']) > 0: 
 			return shout_oid
 		else:
 			print('[migration] no shouts migrated yet')
 			raise Exception
 		return
 	else:
 		with local_session() as session:
 			author = session.query(User).filter(User.oid == entry['createdBy']).first()
 			shout_dict = storage['shouts']['by_oid'][shout_oid]
 			if shout_dict:
 				reaction_dict['shout'] = shout_dict['slug']
 				reaction_dict['createdBy'] = author.slug if author else 'discours'
 				reaction_dict['kind'] = ReactionKind.COMMENT
 				# creating reaction from old comment
 				reaction = Reaction.create(**reaction_dict)
 				reaction_dict['id'] = reaction.id
 				for comment_rating_old in entry.get('ratings',[]):
 					rater = session.query(User).filter(User.oid == comment_rating_old['createdBy']).first()
 					reactedBy = rater if rater else session.query(User).filter(User.slug == 'noname').first()
 					re_reaction_dict = {
 						'shout': reaction_dict['shout'],
 						'replyTo': reaction.id,
 						'kind': ReactionKind.LIKE if comment_rating_old['value'] > 0 else ReactionKind.DISLIKE,
 						'createdBy': reactedBy.slug if reactedBy else 'discours'
 					}
 					cts = comment_rating_old.get('createdAt')
 					if cts: re_reaction_dict['createdAt'] = date_parse(cts)
 					try:
 						# creating reaction from old rating
 						Reaction.create(**re_reaction_dict)
 					except Exception as e:
 						print('[migration] comment rating error: %r' % re_reaction_dict)
 						raise e
 			else:
 				print('[migration] error: cannot find shout for comment %r' % reaction_dict)
 		return reaction
 def migrate_2stage(rr, old_new_id):
 	reply_oid = rr.get('replyTo')
 	if not reply_oid: return
 	new_id = old_new_id.get(rr.get('oid'))
 	if not new_id: return
 	with local_session() as session:
 		comment = session.query(Reaction).filter(Reaction.id == new_id).first()
 		comment.replyTo = old_new_id.get(reply_oid)
 		comment.save()
 		session.commit()
 	if not rr['body']: raise Exception(rr)
--- a/migration/tables/content_items.py
+++ b/migration/tables/content_items.py
@ -0,0 +1,226 @@
 from dateutil.parser import parse as date_parse
 import sqlalchemy
 from orm.shout import Shout, ShoutTopic, User
 from storages.viewed import ViewedByDay
 from transliterate import translit
 from datetime import datetime
 from orm.base import local_session
 from migration.extract import prepare_html_body
 from orm.community import Community
 from orm.reaction import Reaction, ReactionKind
 OLD_DATE = '2016-03-05 22:22:00.350000'
 ts = datetime.now()
 type2layout = {
 	'Article': 'article',
 	'Literature': 'prose',
 	'Music': 'music',
 	'Video': 'video',
 	'Image': 'image'
 }
 def get_shout_slug(entry):
 	slug = entry.get('slug', '')
 	if not slug:
 		for friend in entry.get('friendlySlugs', []):
 			slug = friend.get('slug', '')
 			if slug: break
 	return slug
 def migrate(entry, storage):
 	# init, set title and layout
 	r = {
 		'layout': type2layout[entry['type']],
 		'title': entry['title'],
 		'community': Community.default_community.id,
 		'authors': [],
 		'topics': set([]),
 		# 'rating': 0,
 		# 'ratings': [],
 		'createdAt': []
 	}
 	topics_by_oid = storage['topics']['by_oid']
 	users_by_oid = storage['users']['by_oid']
 	# author
 	oid = entry.get('createdBy', entry.get('_id', entry.get('oid'))) 
 	userdata = users_by_oid.get(oid)
 	if not userdata:
 		app = entry.get('application')
 		if app:
 			userslug = translit(app['name'], 'ru', reversed=True)\
 				.replace(' ', '-')\
 				.replace('\'', '')\
 				.replace('.', '-').lower()
 			userdata = {
 				'username': app['email'],
 				'email': app['email'],
 				'name': app['name'],
 				'bio': app.get('bio', ''),
 				'emailConfirmed': False,
 				'slug': userslug,
 				'createdAt': ts,
 				'wasOnlineAt': ts
 			}
 		else: 
 			userdata = User.default_user.dict()
 	assert userdata, 'no user found for %s from ' % [oid, len(users_by_oid.keys())]
 	r['authors'] = [userdata, ]
 	# slug 
 	slug = get_shout_slug(entry)
 	if slug: r['slug'] = slug
 	else: raise Exception
 	# cover
 	c = ''
 	if entry.get('thumborId'):
 		c = 'https://assets.discours.io/unsafe/1600x/' + entry['thumborId']
 	else:
 		c = entry.get('image', {}).get('url')
 		if not c or 'cloudinary' in c: c = ''
 	r['cover'] = c
 	# timestamps
 	r['createdAt'] = date_parse(entry.get('createdAt', OLD_DATE))
 	r['updatedAt'] = date_parse(entry['updatedAt']) if 'updatedAt' in entry else ts
 	if entry.get('published'): 
 		r['publishedAt'] = date_parse(entry.get('publishedAt', OLD_DATE))
 		if r['publishedAt'] == OLD_DATE: r['publishedAt'] = ts
 	if 'deletedAt' in entry: r['deletedAt'] = date_parse(entry['deletedAt'])
 	# topics
 	category = entry['category']
 	mainTopic = topics_by_oid.get(category)
 	if mainTopic:
 		r['mainTopic'] = storage['replacements'].get(mainTopic["slug"], mainTopic["slug"])
 	topic_oids = [category, ]
 	topic_oids.extend(entry.get('tags', []))
 	for oid in topic_oids:
 		if oid in storage['topics']['by_oid']:
 			r['topics'].add(storage['topics']['by_oid'][oid]['slug'])
 		else:
 			print('[migration] unknown old topic id: ' + oid)
 	r['topics'] = list(r['topics'])
 	entry['topics'] = r['topics']
 	entry['cover'] = r['cover']
 	entry['authors'] = r['authors']
 	# body 
 	r['body'] = prepare_html_body(entry)
 	# save shout to db
 	s = object()
 	shout_dict = r.copy() 
 	user = None
 	del shout_dict['topics'] # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state'
 	#del shout_dict['rating'] # FIXME: TypeError: 'rating' is an invalid keyword argument for Shout
 	#del shout_dict['ratings']
 	email = userdata.get('email')
 	slug = userdata.get('slug')
 	with local_session() as session:
 		# c = session.query(Community).all().pop()
 		if email: user = session.query(User).filter(User.email == email).first()
 		if not user and slug: user = session.query(User).filter(User.slug == slug).first()
 		if not user and userdata: 
 			try: user = User.create(**userdata)
 			except sqlalchemy.exc.IntegrityError:
 				print('[migration] user error: ' + userdata)
 			userdata['id'] = user.id
 			userdata['createdAt'] = user.createdAt
 			storage['users']['by_slug'][userdata['slug']] = userdata
 			storage['users']['by_oid'][entry['_id']] = userdata
 	assert user, 'could not get a user'
 	shout_dict['authors'] = [ user, ] 
 	try: 
 		s = Shout.create(**shout_dict)
 	except sqlalchemy.exc.IntegrityError as e:
 		with local_session() as session:
 			s = session.query(Shout).filter(Shout.slug == shout_dict['slug']).first()
 			bump = False
 			if s: 
 				for key in shout_dict:
 					if key in s.__dict__:
 						if s.__dict__[key] != shout_dict[key]:
 							print('[migration] shout already exists, but differs in %s' % key)
 							bump = True
 					else:
 						print('[migration] shout already exists, but lacks %s' % key)
 						bump = True
 				if bump:
 					s.update(shout_dict)
 			else:
 				print('[migration] something went wrong with shout: \n%r' % shout_dict)
 				raise e
 			session.commit()
 	except:
 		print(s)
 		raise Exception
 	# shout topics aftermath
 	shout_dict['topics'] = []
 	for tpc in r['topics']:
 		oldslug = tpc
 		newslug = storage['replacements'].get(oldslug, oldslug)
 		if newslug:
 			with local_session() as session:
 				shout_topic_old = session.query(ShoutTopic)\
 					.filter(ShoutTopic.shout == shout_dict['slug'])\
 					.filter(ShoutTopic.topic == oldslug).first()
 				if shout_topic_old: 
 					shout_topic_old.update({ 'slug': newslug })
 				else: 
 					shout_topic_new = session.query(ShoutTopic)\
 						.filter(ShoutTopic.shout == shout_dict['slug'])\
 						.filter(ShoutTopic.topic == newslug).first()
 					if not shout_topic_new: 
 						try: ShoutTopic.create(**{ 'shout': shout_dict['slug'], 'topic': newslug })
 						except: print('[migration] shout topic error: ' + newslug)
 				session.commit()
 			if newslug not in shout_dict['topics']:
 				shout_dict['topics'].append(newslug)
 		else:
 			print('[migration] ignored topic slug: \n%r' % tpc['slug'])
 			# raise Exception
 	# content_item ratings to reactions
 	try:
 		for content_rating in entry.get('ratings',[]):
 			with local_session() as session:
 				rater = session.query(User).filter(User.oid == content_rating['createdBy']).first()
 				reactedBy = rater if rater else session.query(User).filter(User.slug == 'noname').first()
 				if rater:
 					reaction_dict = {
 						'kind': ReactionKind.LIKE if content_rating['value'] > 0 else ReactionKind.DISLIKE,
 						'createdBy': reactedBy.slug,
 						'shout': shout_dict['slug']
 					}
 					cts = content_rating.get('createdAt')
 					if cts: reaction_dict['createdAt'] = date_parse(cts)
 					reaction = session.query(Reaction).\
 						filter(Reaction.shout == reaction_dict['shout']).\
 						filter(Reaction.createdBy == reaction_dict['createdBy']).\
 						filter(Reaction.kind == reaction_dict['kind']).first()
 					if reaction:
 						reaction_dict['kind'] = ReactionKind.AGREE if content_rating['value'] > 0 else ReactionKind.DISAGREE,
 						reaction.update(reaction_dict)
 					else: Reaction.create(**reaction_dict)
 					# shout_dict['ratings'].append(reaction_dict)
 	except:
 		print('[migration] content_item.ratings error: \n%r' % content_rating)
 		raise Exception
 	# shout views
 	ViewedByDay.create( shout = shout_dict['slug'], value = entry.get('views', 1) )
 	# del shout_dict['ratings']
 	shout_dict['oid'] = entry.get('_id')
 	storage['shouts']['by_oid'][entry['_id']] = shout_dict
 	storage['shouts']['by_slug'][slug] = shout_dict
 	return shout_dict
--- a/migration/tables/replacements.json
+++ b/migration/tables/replacements.json
@ -0,0 +1,768 @@
 {
    "1990-e": "90s",
    "2000-e": "2000s",
    "90-e": "90s",
    "207": "207",
    "kartochki-rubinshteyna": "rubinstein-cards",
    "Georgia": "georgia",
    "Japan": "japan",
    "Sweden": "sweden",
    "abstraktsiya": "abstract",
    "absurdism": "absurdism",
    "acclimatization": "acclimatisation",
    "activism": "activism",
    "adolf-gitler": "adolf-hitler",
    "afrika": "africa",
    "agata-kristi": "agatha-christie",
    "agressiya": "agression",
    "agressivnoe-povedenie": "agression",
    "aktsii": "actions",
    "aktsionizm": "actionism",
    "alber-kamyu": "albert-kamus",
    "albomy": "albums",
    "aleksandr-griboedov": "aleksander-griboedov",
    "aleksandr-pushkin": "aleksander-pushkin",
    "aleksandr-solzhenitsyn": "aleksander-solzhenitsyn",
    "aleksandr-vvedenskiy": "aleksander-vvedensky",
    "aleksey-navalnyy": "alexey-navalny",
    "alfavit": "alphabet",
    "alkogol": "alcohol",
    "alternativa": "alternative",
    "alternative": "alternative",
    "alternativnaya-istoriya": "alternative-history",
    "amerika": "america",
    "anarhizm": "anarchism",
    "anatoliy-mariengof": "anatoly-mariengof",
    "ancient-russia": "ancient-russia",
    "andegraund": "underground",
    "andrey-platonov": "andrey-platonov",
    "andrey-rodionov": "andrey-rodionov",
    "andrey-tarkovskiy": "andrey-tarkovsky",
    "angliyskie-istorii": "english-stories",
    "angliyskiy-yazyk": "english-langugae",
    "animation": "animation",
    "animatsiya": "animation",
    "anime": "anime",
    "anri-volohonskiy": "anri-volohonsky",
    "antifashizm": "anti-faschism",
    "antiquity": "antiquity",
    "antiutopiya": "dystopia",
    "antropology": "antropology",
    "antropotsen": "antropocenus",
    "architecture": "architecture",
    "arheologiya": "archeology",
    "arhetipy": "archetypes",
    "arhiv": "archive",
    "aristokraty": "aristocracy",
    "aristotel": "aristotle",
    "arktika": "arctic",
    "armiya": "army",
    "art": "art",
    "art-is": "art-is",
    "artists": "artists",
    "ateizm": "atheism",
    "audiopoeziya": "audio-poetry",
    "audio-poetry": "audio-poetry",
    "audiospektakl": "audio-spectacles",
    "auktsyon": "auktsyon",
    "avangard": "avantgarde",
    "avtofikshn": "autofiction",
    "avtorskaya-pesnya": "bardsongs",
    "azbuka-immigratsii": "immigration-basics",
    "aziatskiy-kinematograf": "asian-cinema",
    "b-movie": "b-movie",
    "bannye-chteniya": "sauna-reading",
    "bardsongs": "bardsongs",
    "bdsm": "bdsm",
    "belarus": "belarus",
    "belgiya": "belgium",
    "bertold-breht": "berttold-brecht",
    "bezumie": "madness",
    "biography": "biography",
    "biologiya": "biology",
    "bipolyarnoe-rasstroystvo": "bipolar-disorder",
    "bitniki": "beatnics",
    "biznes": "business",
    "blizhniy-vostok": "middle-east",
    "blizost": "closeness",
    "blokada": "blockade",
    "bob-dilan": "bob-dylan",
    "bog": "god",
    "bol": "pain",
    "bolotnoe-delo": "bolotnaya-case",
    "books": "books",
    "boris-eltsin": "boris-eltsin",
    "boris-godunov": "boris-godunov",
    "boris-grebenschikov": "boris-grebenschikov",
    "boris-nemtsov": "boris-nemtsov",
    "boris-pasternak": "boris-pasternak",
    "brak": "marriage",
    "bret-iston-ellis": "bret-iston-ellis",
    "buddizm": "buddhism",
    "bullying": "bullying",
    "bunt": "riot",
    "burning-man": "burning-man",
    "bytie": "being",
    "byurokratiya": "bureaucracy",
    "capitalism": "capitalism",
    "censored-in-russia": "censored-in-russia",
    "ch-rno-beloe": "black-and-white",
    "ch-rnyy-yumor": "black-humour",
    "chapters": "chapters",
    "charity": "charity",
    "chayldfri": "childfree",
    "chechenskaya-voyna": "chechen-war",
    "chechnya": "chechnya",
    "chelovek": "male",
    "chernobyl": "chernobyl",
    "chernyy-yumor": "black-humour",
    "children": "children",
    "china": "china",
    "chinovniki": "bureaucracy",
    "chukotka": "chukotka",
    "chuma": "plague",
    "church": "church",
    "cinema": "cinema",
    "city": "city",
    "civil-position": "civil-position",
    "clips": "clips",
    "collage": "collage",
    "comics": "comics",
    "conspiracy-theory": "conspiracy-theory",
    "contemporary-art": "contemporary-art",
    "contemporary-poetry": "poetry",
    "contemporary-prose": "prose",
    "coronavirus": "coronavirus",
    "corruption": "corruption",
    "creative-writing-school": "creative-writing-school",
    "crime": "crime",
    "criticism": "criticism",
    "critiques": "reviews",
    "culture": "culture",
    "dadaizm": "dadaism",
    "daniel-defo": "daniel-defoe",
    "daniil-harms": "daniil-kharms",
    "dante-aligeri": "dante-alighieri",
    "darkveyv": "darkwave",
    "death": "death",
    "debaty": "debats",
    "delo-seti": "seti-case",
    "democracy": "democracy",
    "demografiya": "demographics",
    "demonstrations": "demonstrations",
    "depression": "depression",
    "derevnya": "village",
    "design": "design",
    "detskie-doma": "orphanages",
    "detstvo": "childhood",
    "digital": "digital",
    "digital-art": "digital-art",
    "directing": "directing",
    "diskurs": "discours",
    "diskurs-1": "discourse",
    "dissidenty": "dissidents",
    "diy": "diy",
    "dmitriy-donskoy": "dmitriy-donskoy",
    "dmitriy-prigov": "dmitriy-prigov",
    "dnevniki": "dairies",
    "documentary": "documentary",
    "dokumenty": "doсuments",
    "domashnee-nasilie": "home-terror",
    "donald-tramp": "donald-trump",
    "donbass": "donbass",
    "donorstvo": "donation",
    "drama": "drama",
    "dramaturgy": "dramaturgy",
    "drawing": "drawing",
    "drevo-zhizni": "tree-of-life",
    "drugs": "drugs",
    "dzhaz": "jazz",
    "dzhek-keruak": "jack-keruak",
    "dzhim-morrison": "jim-morrison",
    "dzhordzh-romero": "george-romero",
    "dzhordzho-agamben": "giorgio-agamben",
    "ecology": "ecology",
    "economics": "economics",
    "eda": "food",
    "editing": "editing",
    "editorial-statements": "editorial-statements",
    "eduard-limonov": "eduard-limonov",
    "education": "education",
    "egor-letov": "egor-letov",
    "eksperiment": "experiments",
    "eksperimentalnaya-muzyka": "experimental-music",
    "ekspressionizm": "expressionism",
    "ekstremizm": "extremism",
    "ekzistentsializm-1": "existentialism",
    "elections": "elections",
    "electronic": "electronics",
    "electronics": "electronics",
    "elena-glinskaya": "elena-glinskaya",
    "elena-guro": "elena-guro",
    "elizaveta-mnatsakanova": "elizaveta-mnatsakanova",
    "embient": "ambient",
    "emigration": "emigration",
    "emil-dyurkgeym": "emile-durkheim",
    "emotsii": "emotions",
    "empiric": "empiric",
    "epidemiya": "pandemic",
    "erich-von-neff": "erich-von-neff",
    "erotika": "erotics",
    "essay": "essay",
    "estetika": "aestetics",
    "etika": "ethics",
    "etnos": "ethnics",
    "everyday-life": "everyday-life",
    "evgeniy-onegin": "eugene-onegin",
    "evolyutsiya": "evolution",
    "exhibitions": "exhibitions",
    "experience": "experiences",
    "experimental": "experimental",
    "experimental-music": "experimental-music",
    "explanation": "explanation",
    "faktcheking": "fact-checking",
    "falsifikatsii": "falsifications",
    "family": "family",
    "fanfiki": "fan-fiction",
    "fantastika": "sci-fi",
    "fatalizm": "fatalism",
    "fedor-dostoevskiy": "fedor-dostoevsky",
    "fedor-ioannovich": "fedor-ioannovich",
    "feleton": "feuilleton",
    "feminism": "feminism",
    "fenomenologiya": "phenomenology",
    "fentezi": "fantasy",
    "festival": "festival",
    "festival-territoriya": "festival-territory",
    "folk": "folk",
    "folklor": "folklore",
    "fotoreportazh": "photoreports",
    "france": "france",
    "frants-kafka": "franz-kafka",
    "frederik-begbeder": "frederick-begbeder",
    "freedom": "freedom",
    "friendship": "friendship",
    "fsb": "fsb",
    "futbol": "footbool",
    "future": "future",
    "futuristy": "futurists",
    "futurizm": "futurism",
    "galereya": "gallery",
    "gdr": "gdr",
    "gender": "gender",
    "gendernyy-diskurs": "gender",
    "gennadiy-aygi": "gennadiy-aygi",
    "gerhard-rihter": "gerhard-rihter",
    "germaniya": "germany",
    "germenevtika": "hermeneutics",
    "geroi": "heroes",
    "girls": "girls",
    "gkchp": "gkchp",
    "glitch": "glitch",
    "globalizatsiya": "globalisation",
    "gollivud": "hollywood",
    "gonzo": "gonzo",
    "gore-ot-uma": "woe-from-wit",
    "graffiti": "graffiti",
    "graphics": "graphics",
    "gravyura": "engraving",
    "grazhdanskaya-oborona": "grazhdanskaya-oborona",
    "gretsiya": "greece",
    "gulag": "gulag",
    "han-batyy": "khan-batyy",
    "health": "health",
    "himiya": "chemistry",
    "hip-hop": "hip-hop",
    "history": "history",
    "history-of-russia": "history-of-russia",
    "holokost": "holocaust",
    "horeografiya": "choreography",
    "horror": "horror",
    "hospis": "hospice",
    "hristianstvo": "christianity",
    "humans": "humans",
    "humour": "humour",
    "ideologiya": "ideology",
    "idm": "idm",
    "igil": "isis",
    "igor-pomerantsev": "igor-pomerantsev",
    "igra-prestolov": "game-of-throne",
    "igry": "games",
    "iisus-hristos": "jesus-christ",
    "illness": "illness",
    "illustration-history": "illustration-history",
    "illustrations": "illustrations",
    "imazhinizm": "imagism",
    "immanuil-kant": "immanuel-kant",
    "impressionizm": "impressionism",
    "improvizatsiya": "improvisation",
    "indi": "indie",
    "individualizm": "individualism",
    "infografika": "infographics",
    "informatsiya": "information",
    "ingmar-bergman": "ingmar-bergman",
    "inklyuziya": "inclusion",
    "installyatsiya": "installation",
    "internet": "internet",
    "interview": "interview",
    "invalidnost": "disability",
    "investigations": "investigations",
    "iosif-brodskiy": "joseph-brodsky",
    "iosif-stalin": "joseph-stalin",
    "iskusstvennyy-intellekt": "artificial-intelligence",
    "islam": "islam",
    "istoriya-moskvy": "moscow-history",
    "istoriya-teatra": "theatre-history",
    "italiya": "italy",
    "italyanskiy-yazyk": "italian-language",
    "iudaika": "judaica",
    "ivan-groznyy": "ivan-grozny",
    "ivan-iii-gorbatyy": "ivan-iii-gorbaty",
    "ivan-kalita": "ivan-kalita",
    "ivan-krylov": "ivan-krylov",
    "izobreteniya": "inventions",
    "izrail-1": "israel",
    "jazz": "jazz",
    "john-lennon": "john-lennon",
    "journalism": "journalism",
    "justice": "justice",
    "k-pop": "k-pop",
    "kalligrafiya": "calligraphy",
    "karikatura": "caricatures",
    "katrin-nenasheva": "katrin-nenasheva",
    "kavkaz": "caucasus",
    "kazan": "kazan",
    "kiberbezopasnost": "cybersecurity",
    "kinoklub": "cinema-club",
    "kirill-serebrennikov": "kirill-serebrennikov",
    "klassika": "classic",
    "kollektivnoe-bessoznatelnoe": "сollective-unconscious",
    "komediya": "comedy",
    "kommunikatsii": "communications",
    "kommunizm": "communism",
    "kommuny": "communes",
    "kompyuternye-igry": "computer-games",
    "konservatizm": "conservatism",
    "kontrkultura": "counter-culture",
    "kontseptualizm": "conceptualism",
    "korotkometrazhka": "cinema-shorts",
    "kosmos": "cosmos",
    "kraudfanding": "crowdfunding",
    "krizis": "crisis",
    "krov": "blood",
    "krym": "crimea",
    "kulturologiya": "culturology",
    "kulty": "cults",
    "kurdistan": "kurdistan",
    "kurt-kobeyn": "kurt-cobain",
    "kurt-vonnegut": "kurt-vonnegut",
    "kvir": "queer",
    "laboratoriya": "lab",
    "language": "languages",
    "lars-fon-trier": "lars-fon-trier",
    "laws": "laws",
    "lectures": "lectures",
    "leto": "summer",
    "lev-tolstoy": "leo-tolstoy",
    "lgbt": "lgbt",
    "liberalizm": "liberalism",
    "libertarianstvo": "libertarianism",
    "life": "life",
    "likbez": "likbez",
    "lingvistika": "linguistics",
    "lirika": "lirics",
    "literary-studies": "literary-studies",
    "literature": "literature",
    "lo-fi": "lo-fi",
    "love": "love",
    "luzha-goluboy-krovi": "luzha-goluboy-krovi",
    "lyudvig-vitgenshteyn": "ludwig-wittgenstein",
    "lzhedmitriy": "false-dmitry",
    "lzhenauka": "pseudoscience",
    "maks-veber": "max-weber",
    "manifests": "manifests",
    "manipulyatsii-soznaniem": "mind-manipulation",
    "marina-abramovich": "marina-abramovich",
    "marketing": "marketing",
    "marksizm": "marxism",
    "marsel-dyushan": "marchel-duchamp",
    "martin-haydegger": "martin-hidegger",
    "matematika": "maths",
    "vladimir-mayakovskiy": "vladimir-mayakovsky",
    "mayakovskiy": "vladimir-mayakovsky",
    "ekzistentsiya": "existence",
    "media": "media",
    "medicine": "medicine",
    "memuary": "memoirs",
    "menedzhment": "management",
    "merab-mamardashvili": "merab-mamardashvili",
    "mest": "revenge",
    "metamodernizm": "metamodern",
    "metavselennaya": "metaverse",
    "metro": "metro",
    "mifologiya": "mythology",
    "mify": "myth",
    "mihael-haneke": "michael-haneke",
    "mihail-baryshnikov": "mihail-baryshnikov",
    "mihail-bulgakov": "mihail-bulgakov",
    "mikrotonalnaya-muzyka": "mikrotone-muzyka",
    "minimalizm": "minimalism",
    "minkult-privet": "minkult-privet",
    "mir": "world",
    "mirovozzrenie": "mindsets",
    "mishel-fuko": "michel-foucault",
    "mistika": "mystics",
    "mitropolit-makariy": "mitropolit-makariy",
    "mlm": "mlm",
    "moda": "fashion",
    "modernizm": "modernism",
    "mokyumentari": "mockumentary",
    "moloko-plus": "moloko-plus",
    "money": "money",
    "monologs": "monologues",
    "monstratsiya": "monstration",
    "moralnaya-otvetstvennost": "moral-responsibility",
    "more": "sea",
    "moscow": "moscow",
    "moshennichestvo": "frauds",
    "moskovskiy-romanticheskiy-kontseptualizm": "moscow-romantic-conceptualism",
    "moskovskoe-delo": "moscow-case",
    "movies": "movies",
    "mozg": "brain",
    "multiplikatsiya": "animation",
    "music": "music",
    "muzei": "museum",
    "muzey": "museum",
    "muzhchiny": "man",
    "myshlenie": "thinking",
    "nagornyy-karabah": "nagorno-karabakh",
    "natsionalizm": "nationalism",
    "natsionalnaya-ideya": "national-idea",
    "natsizm": "nazism",
    "natyurmort": "nature-morte",
    "nauchpop": "pop-science",
    "nbp": "nbp",
    "nenavist": "hate",
    "neofitsialnaya-literatura": "unofficial-literature",
    "neoklassika": "neoclassic",
    "neprozrachnye-smysly": "hidden-meanings",
    "neravenstvo": "inequality",
    "new-year": "new-year",
    "neyronauka": "neuro-science",
    "neyroseti": "neural-networks",
    "niu-vshe": "hse",
    "nizhniy-novgorod": "nizhny-novgorod",
    "nko": "nonprofits",
    "nlo": "ufo",
    "nobelevskaya-premiya": "nobel-prize",
    "noize-mc": "noize-mc",
    "nonkonformizm": "nonconformism",
    "novaya-drama": "new-drama",
    "novosti": "news",
    "noyz": "noise",
    "oberiu": "oberiu",
    "ocherk": "etudes",
    "ochevidnyy-nuar": "ochevidnyy-nuar",
    "odinochestvo": "loneliness",
    "odna-kniga-odna-istoriya": "one-book-one-story",
    "okrainy": "outskirts",
    "opinions": "opinions",
    "oppozitsiya": "opposition",
    "orhan-pamuk": "orhan-pamuk",
    "ornitologiya": "ornitology",
    "osip-mandelshtam": "osip-mandelshtam",
    "oskar-uayld": "oscar-wilde",
    "osoznanie": "awareness",
    "otnosheniya": "relationship",
    "pablo-pikasso": "pablo-picasso",
    "painting": "painting",
    "paintings": "painting",
    "pamyat": "memory",
    "pandemiya": "pandemic",
    "parizh": "paris",
    "patriotizm": "patriotism",
    "paul-tselan": "paul-tselan",
    "per-burd": "pierre-bourdieu",
    "performance": "performance",
    "peyzazh": "landscape",
    "philology": "philology",
    "philosophy": "philosophy",
    "photo": "photography",
    "photography": "photography",
    "photoprojects": "photoprojects",
    "plakaty": "posters",
    "plastilin": "plasticine",
    "plays": "plays",
    "podrostki": "teenagers",
    "poema": "poem",
    "poems": "poems",
    "poeticheskaya-proza": "poetic-prose",
    "poetry": "poetry",
    "poetry-of-squares": "poetry-of-squares",
    "poetry-slam": "poetry-slam",
    "police": "police",
    "politics": "politics",
    "polsha": "poland",
    "pop-art": "pop-art",
    "pop-culture": "pop-culture",
    "pornografiya": "pornography",
    "portret": "portrait",
    "poslovitsy": "proverbs",
    "post-pank": "post-punk",
    "post-rok": "post-rock",
    "postmodernism": "postmodernism",
    "povest": "novells",
    "povsednevnost": "everyday-life",
    "power": "power",
    "pravo": "right",
    "pravoslavie": "orthodox",
    "pravozaschitniki": "human-rights-activism",
    "prazdnik": "holidays",
    "predatelstvo": "betrayal",
    "predprinimatelstvo": "entrepreneurship",
    "premera": "premier",
    "premiya-oskar": "oscar-prize",
    "pribaltika-1": "baltic",
    "priroda": "nature",
    "prison": "prison",
    "pritcha": "parable",
    "privatnost": "privacy",
    "progress": "progress",
    "projects": "projects",
    "prokrastinatsiya": "procrastination",
    "propaganda": "propaganda",
    "proschenie": "forgiveness",
    "prose": "prose",
    "proshloe": "past",
    "prostitutsiya": "prostitution",
    "prosveschenie": "enlightenment",
    "protests": "protests",
    "psalmy": "psalms",
    "psihoanaliz": "psychoanalysis",
    "psihodeliki": "psychodelics",
    "pskov": "pskov",
    "psychiatry": "psychiatry",
    "psychology": "psychology",
    "punk": "punk",
    "r-b": "rnb",
    "realizm": "realism",
    "redaktura": "editorial",
    "refleksiya": "reflection",
    "reggi": "reggae",
    "religion": "religion",
    "rene-zhirar": "rene-girard",
    "renesanss": "renessance",
    "renovatsiya": "renovation",
    "rep": "rap",
    "reportage": "reportage",
    "repressions": "repressions",
    "research": "research",
    "retroveyv": "retrowave",
    "review": "review",
    "revolution": "revolution",
    "rezo-gabriadze": "rezo-gabriadze",
    "risunki": "painting",
    "roboty": "robots",
    "rock": "rock",
    "roditeli": "parents",
    "romantizm": "romantism",
    "romany": "novell",
    "ronald-reygan": "ronald-reygan",
    "roskomnadzor": "roskomnadzor",
    "rossiyskoe-kino": "russian-cinema",
    "rozhava": "rojava",
    "rpts": "rpts",
    "rus-na-grani-sryva": "rus-na-grani-sryva",
    "russia": "russia",
    "russian-language": "russian-language",
    "russian-literature": "russian-literature",
    "russkiy-mir": "russkiy-mir",
    "salvador-dali": "salvador-dali",
    "samoidentifikatsiya": "self-identity",
    "samoopredelenie": "self-definition",
    "sankt-peterburg": "saint-petersburg",
    "sasha-skochilenko": "sasha-skochilenko",
    "satira": "satiric",
    "saund-art": "sound-art",
    "schaste": "hapiness",
    "school": "school",
    "science": "science",
    "sculpture": "sculpture",
    "second-world-war": "second-world-war",
    "sekond-hend": "second-hand",
    "seksprosvet": "sex-education",
    "sekty": "sects",
    "semiotics": "semiotics",
    "serbiya": "serbia",
    "serialy": "series",
    "sever": "north",
    "severnaya-koreya": "north-korea",
    "sex": "sex",
    "shotlandiya": "scotland",
    "shugeyz": "shoegaze",
    "siloviki": "siloviki",
    "simeon-bekbulatovich": "simeon-bekbulatovich",
    "simvolizm": "simbolism",
    "siriya": "siria",
    "skulptura": "sculpture",
    "slavoy-zhizhek": "slavoj-zizek",
    "smysl": "meaning",
    "sny": "dreams",
    "sobytiya": "events",
    "social": "society",
    "society": "society",
    "sociology": "sociology",
    "sofya-paleolog": "sofya-paleolog",
    "sofya-vitovtovna": "sofya-vitovtovna",
    "soobschestva": "communities",
    "soprotivlenie": "resistence",
    "sotsializm": "socialism",
    "sotsialnaya-filosofiya": "social-philosophy",
    "sotsseti": "social-networks",
    "sotvorenie-tretego-rima": "third-rome",
    "sovremennost": "modernity",
    "spaces": "spaces",
    "spektakl": "spectacles",
    "spetseffekty": "special-fx",
    "spetsoperatsiya": "special-operation",
    "spetssluzhby": "special-services",
    "sport": "sport",
    "srednevekove": "middle-age",
    "state": "state",
    "statistika": "statistics",
    "stendap": "stand-up",
    "stoitsizm": "stoicism",
    "stories": "stories",
    "stoyanie-na-ugre": "stoyanie-na-ugre",
    "strah": "fear",
    "street-art": "street-art",
    "stsenarii": "scenarios",
    "summary": "summary",
    "supergeroi": "superheroes",
    "svetlana-aleksievich": "svetlana-aleksievich",
    "svobodu-ivanu-golunovu": "free-ivan-golunov",
    "syurrealizm": "surrealism",
    "tales": "tales",
    "tanets": "dance",
    "tataro-mongolskoe-igo": "mongol-tatar-yoke",
    "tatuirovki": "tattoo",
    "technology": "technology",
    "televidenie": "tv",
    "telo": "body",
    "telo-kak-iskusstvo": "body-as-art",
    "terrorizm": "terrorism",
    "tests": "tests",
    "text": "texts",
    "the-beatles": "the-beatles",
    "theater": "theater",
    "theory": "theory",
    "tokio": "tokio",
    "torture": "torture",
    "totalitarizm": "totalitarism",
    "traditions": "traditions",
    "tragicomedy": "tragicomedy",
    "transgendernost": "transgender",
    "translation": "translation",
    "transport": "transport",
    "travel": "travel",
    "travma": "trauma",
    "trendy": "trends",
    "tretiy-reyh": "third-reich",
    "triller": "thriller",
    "tsar": "central-african-republic",
    "tsar-edip": "oedipus",
    "tsarevich-dmitriy": "tsarevich-dmitry",
    "tsennosti": "values",
    "tsenzura": "censorship",
    "tseremonii": "ceremonies",
    "turizm": "tourism",
    "tvorchestvo": "creativity",
    "ugnetennyy-zhilischnyy-klass": "oppressed-housing-class",
    "uilyam-shekspir": "william-shakespeare",
    "ukraine": "ukraine",
    "university": "university",
    "urban-studies": "urban-studies",
    "uroki-literatury": "literature-lessons",
    "usa": "usa",
    "ussr": "ussr",
    "utopiya": "utopia",
    "valter-benyamin": "valter-benyamin",
    "varlam-shalamov": "varlam-shalamov",
    "vasiliy-ii-temnyy": "basil-ii-temnyy",
    "vasiliy-iii": "basil-iii",
    "vdnh": "vdnh",
    "vechnost": "ethernety",
    "velikobritaniya": "great-britain",
    "velimir-hlebnikov": "velimir-hlebnikov",
    "velkom-tu-greyt-britn": "welcome-to-great-britain",
    "venedikt-erofeev": "venedikt-erofeev",
    "venetsiya": "veneece",
    "vengriya": "hungary",
    "verlibry": "free-verse",
    "veschi": "things",
    "vessels": "vessels",
    "veterany": "veterans",
    "video": "video",
    "videoart": "videoart",
    "videoklip": "clips",
    "videopoeziya": "video-poetry",
    "viktor-astafev": "viktor-astafev",
    "viktor-pelevin": "viktor-pelevin",
    "vilgelm-rayh": "wilhelm-reich",
    "vinzavod": "vinzavod",
    "violence": "violence",
    "visual-culture": "visual-culture",
    "vizualnaya-poeziya": "visual-poetry",
    "vladimir-lenin": "vladimir-lenin",
    "vladimir-nabokov": "vladimir-nabokov",
    "vladimir-putin": "vladimir-putin",
    "vladimir-sorokin": "vladimir-sorokin",
    "vladimir-voynovich": "vladimir-voynovich",
    "volga": "volga",
    "volontery": "volonteurs",
    "vong-karvay": "wong-karwai",
    "vospominaniya": "memories",
    "vostok": "east",
    "vremya": "time",
    "vudi-allen": "woody-allen",
    "vynuzhdennye-otnosheniya": "forced-relationship",
    "war": "war",
    "war-in-ukraine-images": "war-in-ukrahine-images",
    "women": "women",
    "work": "work",
    "writers": "writers",
    "xx-century": "xx-century",
    "yakob-yordans": "yakob-yordans",
    "yan-vermeer": "yan-vermeer",
    "yanka-dyagileva": "yanka-dyagileva",
    "yaponskaya-literatura": "japan-literature",
    "youth": "youth",
    "yozef-rot": "yozef-rot",
    "yurgen-habermas": "jorgen-habermas",
    "za-liniey-mannergeyma": "behind-mannerheim-line",
    "zahar-prilepin": "zahar-prilepin",
    "zakonodatelstvo": "laws",
    "zakony-mira": "world-laws",
    "zametki": "notes",
    "zhelanie": "wish",
    "konets-vesny": "end-of-spring",
    "zhivotnye": "animals",
    "zhoze-saramago": "jose-saramago",
    "zigmund-freyd": "sigmund-freud",
    "zolotaya-orda": "golden-horde",
    "zombi": "zombie",
    "zombi-simpsony": "zombie-simpsons",
    "rouling": "rowling",
    "diskurs-analiz": "discourse-analytics",
    "menty": "police",
    "ptitsy": "birds",
    "salo": "lard",
    "rasizm": "racism",
    "griby": "mushrooms",
    "politzaklyuchennye": "political-prisoners",
    "molodezh": "youth",
    "blocked-in-russia": "blocked-in-russia",
    "kavarga": "kavarga",
    "galereya-anna-nova": "gallery-anna-nova",
    "derrida": "derrida"
 }
--- a/migration/tables/topics.py
+++ b/migration/tables/topics.py
@ -0,0 +1,28 @@
 from migration.extract import extract_md, html2text
 from orm.base import local_session
 from orm import Topic, Community
 def migrate(entry):
 	body_orig = entry.get('description', '').replace('&nbsp;', ' ')
 	topic_dict = {
 		'slug': entry['slug'],
 		'oid': entry['_id'],
 		'title': entry['title'].replace('&nbsp;', ' '), #.lower(),
 		'children': [],
 		'community' : Community.default_community.slug
 	}
 	topic_dict['body'] = extract_md(html2text(body_orig), entry['_id'])
 	with local_session() as session:
 		slug = topic_dict['slug']
 		topic = session.query(Topic).filter(Topic.slug == slug).first()
 		if not topic: 
 			topic = Topic.create(**topic_dict)
 		if len(topic.title) > len(topic_dict['title']):
 			topic.update({ 'title':  topic_dict['title'] })
 		if len(topic.body) < len(topic_dict['body']):
 			topic.update({ 'body':  topic_dict['body'] })
 		session.commit()
 	# print(topic.__dict__)
 	rt = topic.__dict__.copy()
 	del rt['_sa_instance_state']
 	return rt
--- a/migration/tables/users.py
+++ b/migration/tables/users.py
@ -0,0 +1,106 @@
 import sqlalchemy
 from migration.html2text import html2text
 from orm import User, UserRating
 from dateutil.parser import parse
 from orm.base import local_session
 def migrate(entry):
 	if 'subscribedTo' in entry: del entry['subscribedTo']
 	email = entry['emails'][0]['address']
 	user_dict = {
 		'oid': entry['_id'],
 		'roles': [],
 		'ratings': [],
 		'username': email,
 		'email': email,
 		'password': entry['services']['password'].get('bcrypt', ''),
 		'createdAt': parse(entry['createdAt']),
 		'emailConfirmed': bool(entry['emails'][0]['verified']),
 		'muted': False, # amnesty
 		'bio': entry['profile'].get('bio', ''),
 		'notifications': [],
 		'createdAt': parse(entry['createdAt']),
 		'roles': [], # entry['roles'] # roles by community
 		'ratings': [], # entry['ratings']
 		'links': [],
 		'name': 'anonymous'
 	}
 	if 'updatedAt' in entry: user_dict['updatedAt'] = parse(entry['updatedAt'])
 	if 'wasOnineAt' in entry: user_dict['wasOnlineAt'] = parse(entry['wasOnlineAt'])
 	if entry.get('profile'):
 		# slug
 		user_dict['slug'] = entry['profile'].get('path')
 		user_dict['bio'] = html2text(entry.get('profile').get('bio') or '')
 		# userpic
 		try: user_dict['userpic'] = 'https://assets.discours.io/unsafe/100x/' + entry['profile']['thumborId']
 		except KeyError:
 			try: user_dict['userpic'] = entry['profile']['image']['url']
 			except KeyError: user_dict['userpic'] = ''
 		# name
 		fn = entry['profile'].get('firstName', '')
 		ln = entry['profile'].get('lastName', '')
 		name = user_dict['slug'] if user_dict['slug'] else 'noname'
 		name = fn if fn else name
 		name = (name + ' ' + ln) if ln else name
 		name = entry['profile']['path'].lower().replace(' ', '-') if len(name) < 2 else name
 		user_dict['name'] = name
 		# links
 		fb = entry['profile'].get('facebook', False)
 		if fb: user_dict['links'].append(fb)
 		vk = entry['profile'].get('vkontakte', False)
 		if vk: user_dict['links'].append(vk)
 		tr = entry['profile'].get('twitter', False)
 		if tr: user_dict['links'].append(tr)
 		ws = entry['profile'].get('website', False)
 		if ws: user_dict['links'].append(ws)
 	# some checks
 	if not user_dict['slug'] and len(user_dict['links']) > 0: 
 		user_dict['slug'] = user_dict['links'][0].split('/')[-1]
 	user_dict['slug'] = user_dict.get('slug', user_dict['email'].split('@')[0])
 	oid = user_dict['oid']
 	try: user = User.create(**user_dict.copy())
 	except sqlalchemy.exc.IntegrityError:
 		print('[migration] cannot create user ' + user_dict['slug'])
 		with local_session() as session:
 			old_user = session.query(User).filter(User.slug == user_dict['slug']).first()
 			old_user.oid = oid
 			user = old_user
 			if not user:
 				print('[migration] ERROR: cannot find user ' + user_dict['slug'])
 				raise Exception
 	user_dict['id'] = user.id
 	return user_dict
 def migrate_2stage(entry, id_map):
 	ce = 0
 	for rating_entry in entry.get('ratings',[]):
 		rater_oid = rating_entry['createdBy']
 		rater_slug = id_map.get(rater_oid)
 		if not rater_slug:
 			ce +=1
 			# print(rating_entry)
 			continue
 		oid = entry['_id']
 		author_slug = id_map.get(oid)
 		user_rating_dict = {
 			'value': rating_entry['value'],
 			'rater': rater_slug,
 			'user': author_slug
 		}
 		with local_session() as session:
 			try:
 				user_rating = UserRating.create(**user_rating_dict)
 			except sqlalchemy.exc.IntegrityError:
 				old_rating = session.query(UserRating).filter(UserRating.rater == rater_slug).first()
 				print('[migration] cannot create ' + author_slug + '`s rate from ' + rater_slug)
 				print('[migration] concat rating value %d+%d=%d' % (old_rating.value, rating_entry['value'], old_rating.value + rating_entry['value']))
 				old_rating.update({ 'value': old_rating.value + rating_entry['value'] })
 				session.commit()
 			except Exception as e:
 				print(e)
 	return ce
--- a/migration/utils.py
+++ b/migration/utils.py
@ -0,0 +1,9 @@
 from datetime import datetime
 from json import JSONEncoder
 class DateTimeEncoder(JSONEncoder):
    def default(self, z):
        if isinstance(z, datetime):
            return (str(z))
        else:
            return super().default(z)
		`@ -0,0 +1 @@`
							`__all__ = ["tables", "bson2json", "html2md"]`
		`@ -0,0 +1 @@`
							`__all__ = ["users", "tags", "content_items", "comments"],`