role_id and topic relations fixes
This commit is contained in:
41
migration/README.md
Normal file
41
migration/README.md
Normal file
@@ -0,0 +1,41 @@
|
||||
# discours-migration
|
||||
|
||||
First, put the `data` into this folder.
|
||||
|
||||
## Install
|
||||
|
||||
```sh
|
||||
pipenv install -r requirements.txt
|
||||
```
|
||||
|
||||
## Using
|
||||
|
||||
Put the unpacked mongodump to the `data` folder and operate with `pipenv shell && python`
|
||||
|
||||
|
||||
1. get old data jsons
|
||||
|
||||
```py
|
||||
import bson2json
|
||||
|
||||
bson2json.json_tables() # creates all the needed data json from bson mongodump
|
||||
```
|
||||
|
||||
2. migrate users
|
||||
|
||||
```py
|
||||
import json
|
||||
from migrations.users import migrate
|
||||
|
||||
data = json.loads(open('data/users.json').read())
|
||||
newdata = {}
|
||||
|
||||
for u in data:
|
||||
try:
|
||||
newdata[u['_id']] = migrate(u)
|
||||
except:
|
||||
print('FAIL!')
|
||||
print(u)
|
||||
|
||||
|
||||
```
|
1
migration/__init__.py
Normal file
1
migration/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__all__ = ["tables", "bson2json", "html2md"]
|
30
migration/bson2json.py
Normal file
30
migration/bson2json.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import bson
|
||||
import datetime
|
||||
import json
|
||||
import importlib
|
||||
|
||||
import DateTimeEncoder from utils
|
||||
|
||||
data = {
|
||||
"content_items": [],
|
||||
"content_item_categories": [],
|
||||
"tags": [],
|
||||
"email_subscriptions": [],
|
||||
"users": [],
|
||||
"comments": []
|
||||
}
|
||||
|
||||
def json_tables():
|
||||
print('creating json files at data/')
|
||||
|
||||
for table in data.keys():
|
||||
lc = []
|
||||
with open('data/'+table+'.bson', 'rb') as f:
|
||||
bs = f.read()
|
||||
base = 0
|
||||
while base < len(bs):
|
||||
base, d = bson.decode_document(bs, base)
|
||||
lc.append(d)
|
||||
data[table] = lc
|
||||
open('data/'+table+'.json', 'w').write(json.dumps(lc,cls=DateTimeEncoder))
|
||||
|
166
migration/html2md.py
Normal file
166
migration/html2md.py
Normal file
@@ -0,0 +1,166 @@
|
||||
from html.parser import HTMLParser
|
||||
import os
|
||||
import codecs
|
||||
from typing import Tuple
|
||||
|
||||
|
||||
class Converter(HTMLParser):
|
||||
md_file: str
|
||||
temp_tag: str
|
||||
code_box: bool
|
||||
div_count: int
|
||||
code_box_div_num: int
|
||||
ol_count: int
|
||||
related_data: list
|
||||
is_link: bool
|
||||
link_ref: str
|
||||
ignore_data: bool
|
||||
class_div_count: int
|
||||
ignore_div: bool
|
||||
table_start: Tuple[int, int]
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.md_file = ''
|
||||
self.code_box = False
|
||||
self.div_count = 0
|
||||
self.code_box_div_num = 0
|
||||
self.ol_count = 0
|
||||
self.temp_tag = ''
|
||||
self.related_data = []
|
||||
self.is_link = False
|
||||
self.link_ref = ''
|
||||
self.ignore_data = False
|
||||
self.class_div_count = 0
|
||||
self.ignore_div = False
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if self.ignore_data:
|
||||
return None
|
||||
elif tag == 'br':
|
||||
self.md_file += ' \n'
|
||||
elif tag == 'hr':
|
||||
self.md_file += '\n*** \n'
|
||||
elif tag == 'title':
|
||||
self.md_file += '# '
|
||||
elif tag == 'h1':
|
||||
self.md_file += '# '
|
||||
elif tag == 'h2':
|
||||
self.md_file += '## '
|
||||
elif tag == 'h3':
|
||||
self.md_file += '### '
|
||||
elif tag == 'b' or tag == 'strong':
|
||||
self.md_file += '**'
|
||||
elif tag == 'ul':
|
||||
self.temp_tag = 'ul'
|
||||
self.md_file += ' \n'
|
||||
elif tag == 'ol':
|
||||
self.ol_count = 0
|
||||
self.temp_tag = 'ol'
|
||||
self.md_file += ' \n'
|
||||
elif tag == 'li':
|
||||
if self.temp_tag == 'ul':
|
||||
self.md_file += '* '
|
||||
elif self.temp_tag == 'ol':
|
||||
self.ol_count += 1
|
||||
self.md_file += f'{self.ol_count}. '
|
||||
elif tag == 'div':
|
||||
self.div_count += 1
|
||||
attrs_dict = dict(attrs)
|
||||
if 'style' in attrs_dict and 'codeblock' in attrs_dict['style']:
|
||||
self.code_box_div_num = self.div_count
|
||||
self.code_box = True
|
||||
self.md_file += '```\n'
|
||||
elif 'class' in attrs_dict:
|
||||
self.class_div_count = self.div_count
|
||||
self.ignore_div = True
|
||||
elif tag == 'en-codeblock':
|
||||
self.code_box = True
|
||||
self.md_file += '\n```\n'
|
||||
elif tag == 'a':
|
||||
self.is_link = True
|
||||
attrs_dict = dict(attrs)
|
||||
self.link_ref = attrs_dict.get('href', '#')
|
||||
if not self.link_ref.startswith('http') and not self.link_ref.endswith('html') and not '@' in self.link_ref:
|
||||
self.related_data.append(self.link_ref)
|
||||
elif tag == 'style':
|
||||
self.ignore_data = True
|
||||
elif tag == 'symbol':
|
||||
self.ignore_data = True
|
||||
elif tag == 'svg':
|
||||
self.ignore_data = True
|
||||
elif tag == 'path':
|
||||
self.ignore_data = True
|
||||
elif tag == 'img':
|
||||
attrs_dict = dict(attrs)
|
||||
img_ref = attrs_dict['src']
|
||||
alt_name = attrs_dict['alt'] if 'alt' in attrs_dict else 'Placeholder'
|
||||
if self.is_link:
|
||||
self.related_data.append(img_ref)
|
||||
self.md_file += f'[]({self.link_ref})'
|
||||
else:
|
||||
self.related_data.append(img_ref)
|
||||
self.md_file += f''
|
||||
elif tag == 'table':
|
||||
self.ignore_data = True
|
||||
self.table_start = self.getpos()
|
||||
|
||||
def get_rawdata(self, start, stop, offset):
|
||||
temp_rawdata = self.rawdata
|
||||
for i in range(offset-1):
|
||||
next_section = temp_rawdata.find('\n')
|
||||
temp_rawdata = temp_rawdata[next_section+1:]
|
||||
return temp_rawdata[start:stop]
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if tag == 'b' or tag == 'strong':
|
||||
self.md_file += '** \n'
|
||||
elif tag == 'div':
|
||||
if self.code_box and self.code_box_div_num == self.div_count:
|
||||
self.code_box = False
|
||||
self.md_file += '```\n'
|
||||
elif self.ignore_div and self.class_div_count == self.div_count:
|
||||
self.ignore_div = False
|
||||
else:
|
||||
self.md_file += ' \n'
|
||||
self.div_count -= 1
|
||||
elif tag == 'en-codeblock':
|
||||
self.code_box = False
|
||||
self.md_file += '```\n'
|
||||
elif tag == 'a':
|
||||
self.is_link = False
|
||||
elif tag == 'style':
|
||||
self.ignore_data = False
|
||||
elif tag == 'symbol':
|
||||
self.ignore_data = False
|
||||
elif tag == 'svg':
|
||||
self.ignore_data = False
|
||||
elif tag == 'li':
|
||||
self.md_file += ' \n'
|
||||
elif tag == 'table':
|
||||
offset, lineno_stop = self.getpos()
|
||||
lineno_stop = lineno_stop + len(tag) + 3
|
||||
_, lineno_start = self.table_start
|
||||
raw_data = self.get_rawdata(lineno_start, lineno_stop, offset)
|
||||
self.md_file += '\n' + raw_data
|
||||
self.ignore_data = False
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
if tag == 'br':
|
||||
self.md_file += ' \n'
|
||||
elif tag == 'hr':
|
||||
self.md_file += '\n*** \n'
|
||||
elif tag == 'img':
|
||||
attr_dict = dict(attrs)
|
||||
name = attr_dict['data-filename']
|
||||
img_ref = attr_dict['src']
|
||||
self.related_data.append(img_ref)
|
||||
self.md_file += f''
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.is_link:
|
||||
self.md_file += f'[{data}]({self.link_ref})'
|
||||
elif self.ignore_data:
|
||||
pass
|
||||
else:
|
||||
self.md_file += data
|
1
migration/tables/__init__.py
Normal file
1
migration/tables/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__all__ = ["users"]
|
36
migration/tables/comments.py
Normal file
36
migration/tables/comments.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from html2md import Converter
|
||||
import datetime
|
||||
|
||||
markdown = Converter()
|
||||
|
||||
def migrate(entry):
|
||||
```
|
||||
# is comment
|
||||
type Shout {
|
||||
org: String!
|
||||
slug: String!
|
||||
author: Int!
|
||||
body: String!
|
||||
createdAt: DateTime!
|
||||
updatedAt: DateTime!
|
||||
deletedAt: DateTime
|
||||
deletedBy: Int
|
||||
rating: Int
|
||||
published: DateTime # if there is no published field - it is not published
|
||||
replyTo: String # another shout
|
||||
tags: [String] # actual values
|
||||
topics: [String] # topic-slugs
|
||||
title: String
|
||||
versionOf: String
|
||||
visibleForRoles: [String] # role ids are strings
|
||||
visibleForUsers: [Int]
|
||||
}
|
||||
```
|
||||
# TODO: implement comments migration
|
||||
return {
|
||||
'org': 'discours.io',
|
||||
'slug': entry['slug'],
|
||||
'createdAt': entry['createdAt'],
|
||||
'body': markdown(entry['body']),
|
||||
'replyTo': entry['']
|
||||
}
|
19
migration/tables/content_item_categories.py
Normal file
19
migration/tables/content_item_categories.py
Normal file
@@ -0,0 +1,19 @@
|
||||
def migrate(entry):
|
||||
```
|
||||
type Topic {
|
||||
slug: String! # ID
|
||||
createdBy: Int! # User
|
||||
createdAt: DateTime!
|
||||
value: String
|
||||
parents: [String] # NOTE: topic can have parent topics
|
||||
children: [String] # and children
|
||||
}
|
||||
```
|
||||
return {
|
||||
'slug': entry['slug'],
|
||||
'createdBy': entry['createdBy'], # NOTE: uses an old user id
|
||||
'createdAt': entry['createdAt'],
|
||||
'value': entry['title'].lower(),
|
||||
'parents': [],
|
||||
'children': []
|
||||
}
|
86
migration/tables/content_items.py
Normal file
86
migration/tables/content_items.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from migration.html2md import Converter
|
||||
from dateutil.parser import parse
|
||||
from os.path import abspath
|
||||
import json
|
||||
from orm import Shout
|
||||
|
||||
users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
|
||||
users_dict['0'] = {'id': 99999 }
|
||||
|
||||
markdown = Converter()
|
||||
|
||||
type2layout = {
|
||||
'Article': 'article',
|
||||
'Literature': 'prose',
|
||||
'Music': 'music',
|
||||
'Video': 'video',
|
||||
'Image': 'image'
|
||||
}
|
||||
|
||||
def migrate(entry):
|
||||
'''
|
||||
type Shout {
|
||||
org_id: Int!
|
||||
slug: String!
|
||||
author: Int!
|
||||
body: String!
|
||||
createdAt: DateTime!
|
||||
updatedAt: DateTime!
|
||||
deletedAt: DateTime
|
||||
deletedBy: Int
|
||||
rating: Int
|
||||
ratigns: [Rating]
|
||||
published: Bool!
|
||||
publishedAt: DateTime # if there is no published field - it is not published
|
||||
replyTo: String # another shout
|
||||
tags: [String] # actual values
|
||||
topics: [String] # topic-slugs, order has matter
|
||||
title: String
|
||||
versionOf: String
|
||||
visibleForRoles: [String] # role ids are strings
|
||||
visibleForUsers: [Int]
|
||||
views: Int
|
||||
}
|
||||
'''
|
||||
r = {
|
||||
'org_id': 0,
|
||||
'layout': type2layout[entry['type']],
|
||||
'title': entry['title'],
|
||||
'authors': [ users_dict[entry['createdBy']]['id'], ],
|
||||
'topics': [],
|
||||
'published': entry['published'],
|
||||
'views': entry['views'],
|
||||
'rating': entry['rating'],
|
||||
'ratings': []
|
||||
}
|
||||
r['slug'] = entry.get('slug')
|
||||
if not r['slug'] and entry.get('friendlySlugs') is not None:
|
||||
r['slug'] = entry['friendlySlugs']['slug'][0]['slug']
|
||||
if(r['slug'] is None):
|
||||
r['slug'] = entry['friendlySlugs'][0]['slug']
|
||||
if entry.get('image') is not None:
|
||||
r['cover'] = entry['image']['url']
|
||||
elif entry.get('thumborId') is not None:
|
||||
r['cover'] = 'https://discours.io/' + entry['thumborId']
|
||||
|
||||
if entry.get('publishedAt') is not None:
|
||||
r['publishedAt'] = entry['publishedAt']
|
||||
if entry.get('createdAt') is not None:
|
||||
r['createdAt'] = entry['createdAt']
|
||||
if entry.get('updatedAt') is not None:
|
||||
r['updatedAt'] = entry['updatedAt']
|
||||
if entry.get('type') == 'Literature':
|
||||
r['body'] = entry['media'][0]['literatureBody']
|
||||
elif entry.get('type') == 'Video':
|
||||
r['body'] = '<ShoutVideo src=\"' + entry['media'][0]['youtubeId'] + '\" />'
|
||||
elif entry.get('type') == 'Music':
|
||||
r['body'] = '<ShoutMusic media={\"' + json.dumps(entry['media']) +'\"} />'
|
||||
else entry.get('type') == 'Image':
|
||||
r['body'] = r['body']
|
||||
else:
|
||||
r['body'] = '## ' + r['title']
|
||||
# TODO: compile md with graymatter
|
||||
open('migration/content/' + r['slug'] + '.md', 'w').write(mdfile)
|
||||
shout = Shout.create(**r.copy())
|
||||
r['id'] = shout['id']
|
||||
return r
|
2
migration/tables/email_subscriptions.py
Normal file
2
migration/tables/email_subscriptions.py
Normal file
@@ -0,0 +1,2 @@
|
||||
def migrate(entry):
|
||||
return entry
|
20
migration/tables/tags.py
Normal file
20
migration/tables/tags.py
Normal file
@@ -0,0 +1,20 @@
|
||||
def migrate(entry):
|
||||
```
|
||||
type Topic {
|
||||
slug: String! # ID
|
||||
createdBy: Int! # User
|
||||
createdAt: DateTime!
|
||||
value: String
|
||||
parents: [String] # NOTE: topic can have parent topics
|
||||
children: [String] # and children
|
||||
}
|
||||
```
|
||||
creator = get_new_user_id(entry['cratedBy'])
|
||||
return {
|
||||
'slug': entry['slug'],
|
||||
'createdBy': creator_id, # NOTE: uses an old user id
|
||||
'createdAt': entry['createdAt'],
|
||||
'value': entry['value'].lower(),
|
||||
'parents': [],
|
||||
'children': []
|
||||
}
|
79
migration/tables/users.py
Normal file
79
migration/tables/users.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from orm import User
|
||||
from dateutil.parser import parse
|
||||
|
||||
counter = 0
|
||||
|
||||
def migrate(entry):
|
||||
'''
|
||||
|
||||
type User {
|
||||
username: String! # email
|
||||
createdAt: DateTime!
|
||||
email: String
|
||||
password: String
|
||||
oauth: String # provider:token
|
||||
viewname: String # to display
|
||||
userpic: String
|
||||
links: [String]
|
||||
emailConfirmed: Boolean # should contain all emails too
|
||||
id: Int!
|
||||
muted: Boolean
|
||||
rating: Int
|
||||
roles: [Role]
|
||||
updatedAt: DateTime
|
||||
wasOnlineAt: DateTime
|
||||
ratings: [Rating]
|
||||
slug: String
|
||||
bio: String
|
||||
notifications: [Int]
|
||||
}
|
||||
|
||||
'''
|
||||
res = {}
|
||||
try:
|
||||
res['old_id'] = entry['_id']
|
||||
res['password'] = entry['services']['password'].get('bcrypt', '')
|
||||
res['username'] = entry['emails'][0]['address']
|
||||
res['email'] = res['username']
|
||||
res['wasOnlineAt'] = parse(entry.get('loggedInAt', entry['createdAt']))
|
||||
res['emailConfirmed'] = entry['emails'][0]['verified']
|
||||
res['createdAt'] = parse(entry['createdAt'])
|
||||
res['rating'] = entry['rating'] # number
|
||||
res['roles'] = [] # entry['roles'] # roles without org is for discours.io
|
||||
res['ratings'] = [] # entry['ratings']
|
||||
res['notifications'] = []
|
||||
res['links'] = []
|
||||
res['muted'] = False
|
||||
res['viewname'] = 'anonymous'
|
||||
if entry['profile']:
|
||||
res['slug'] = entry['profile'].get('path')
|
||||
res['userpic'] = entry['profile'].get('image', {'url': ''}).get('url', '')
|
||||
viewname = entry['profile'].get('firstName', '') + ' ' + entry['profile'].get('lastName', '')
|
||||
viewname = entry['profile']['path'] if len(viewname) < 2 else viewname
|
||||
res['viewname'] = viewname
|
||||
fb = entry['profile'].get('facebook', False)
|
||||
if fb:
|
||||
res['links'].append(fb)
|
||||
vk = entry['profile'].get('vkontakte', False)
|
||||
if vk:
|
||||
res['links'].append(vk)
|
||||
tr = entry['profile'].get('twitter', False)
|
||||
if tr:
|
||||
res['links'].append(tr)
|
||||
ws = entry['profile'].get('website', False)
|
||||
if ws:
|
||||
res['links'].append(ws)
|
||||
if not res['slug']:
|
||||
res['slug'] = res['links'][0].split('/')[-1]
|
||||
if not res['slug']:
|
||||
res['slug'] = res['email'].split('@')[0]
|
||||
except Exception:
|
||||
print(entry['profile'])
|
||||
raise Exception
|
||||
else:
|
||||
old = res['old_id']
|
||||
del res['old_id']
|
||||
user = User.create(**res.copy())
|
||||
res['id'] = user.id
|
||||
res['old_id'] = old
|
||||
return res
|
9
migration/utils.py
Normal file
9
migration/utils.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from datetime import datetime
|
||||
from json import JSONEncoder
|
||||
|
||||
class DateTimeEncoder(JSONEncoder):
|
||||
def default(self, z):
|
||||
if isinstance(z, datetime):
|
||||
return (str(z))
|
||||
else:
|
||||
return super().default(z)
|
Reference in New Issue
Block a user