comments migration wip

This commit is contained in:
Untone 2021-10-12 22:38:12 +03:00
parent 816a90f656
commit 80f4fa1d08
6 changed files with 147 additions and 103 deletions

View File

@ -26,8 +26,8 @@ def extract_images(article):
article['old_id'] + str(i) + '.' + ext
img = match.group(4)
if img not in images:
open('..' + link, 'wb').write(base64.b64decode(img))
images.append(img)
open('..' + link, 'wb').write(base64.b64decode(img))
images.append(img)
body = body.replace(match.group(2), link)
print(link)
article['body'] = body
@ -55,13 +55,12 @@ def users():
counter += 1
export_list = sorted(export_data.items(),
key=lambda item: item[1]['rating'])[-10:]
open('migration/data/users.dict.json',
'w').write(json.dumps(newdata, cls=DateTimeEncoder)) # NOTE: by old_id
open('migration/data/users.dict.json', 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) # NOTE: by old_id
open('../src/data/authors.json', 'w').write(json.dumps(dict(export_list),
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print(str(len(newdata.items())) + ' user accounts were migrated')
print(str(len(export_list)) + ' authors were exported')
@ -96,15 +95,14 @@ def topics():
export_list = sorted(new_data.items(), key=lambda item: str(
item[1]['createdAt']))
open('migration/data/topics.dict.json',
'w').write(json.dumps(old_data, cls=DateTimeEncoder))
'w').write(json.dumps(old_data, cls=DateTimeEncoder))
open('../src/data/topics.json', 'w').write(json.dumps(dict(export_list),
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print(str(counter) + ' from ' + str(len(cat_data)) +
#' tags and ' + str(len(tag_data)) +
' cats were migrated')
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
print(str(counter) + ' from ' + str(len(cat_data)) + ' cats were migrated')
#' tags and ' + str(len(tag_data)) +
print(str(len(export_list)) + ' topics were exported')
@ -114,7 +112,7 @@ def shouts():
counter = 0
discours_author = 0
content_data = json.loads(open('migration/data/content_items.json').read())
# content_dict = { x['_id']:x for x in content_data }
content_dict = { x['_id']:x for x in content_data }
newdata = {}
print(str(len(content_data)) + ' entries loaded. now migrating...')
errored = []
@ -129,18 +127,18 @@ def shouts():
if author == 'discours':
discours_author += 1
open('./shouts.id.log', 'a').write(line + '\n')
except Exception:
except Exception as e:
print(entry['_id'])
errored.append(entry)
raise Exception(" error")
raise e
try:
limit = int(sys.argv[2]) if len(sys.argv) > 2 else len(content_data)
except ValueError:
limit = len(content_data)
open('migration/data/shouts.dict.json',
'w').write(json.dumps(newdata, cls=DateTimeEncoder))
'w').write(json.dumps(newdata, cls=DateTimeEncoder))
print(str(counter) + '/' + str(len(content_data)) +
' content items were migrated')
' content items were migrated')
print(str(discours_author) + ' from them by @discours')
def comments():
@ -156,13 +154,13 @@ def comments():
print(str(len(export_articles.items())) + ' articles were exported')
export_comments = {}
c = 0
for article in export_articles:
print(article['slug'])
print( comments_by_post.get(article['slug'], '') )
print( export_comments[article['slug']] ) # = comments_by_post.get(article['slug'])
c += len(export_comments[article['slug']])
print(str(len(export_comments.items())) + ' articles with comments')
open('../src/data/coments.json', 'w').write(json.dumps(dict(export_comments),
for slug, article in export_articles.items():
comments = comments_by_post.get(slug, [])
if len(comments) > 0:
export_comments[slug] = comments
c += len(comments)
print(str(len(export_comments.items())) + ' after adding those having comments')
open('../src/data/comments.json', 'w').write(json.dumps(dict(export_comments),
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
@ -192,7 +190,7 @@ def export_shouts(limit):
for (slug, article) in export_list:
if article['layout'] == 'article':
for author in article['authors']:
export_authors[author['slug']] = users_slug[author['slug']]
export_authors[author['slug']] = users_slug[author['slug']]
export_clean[article['slug']] = extract_images(article)
metadata = get_metadata(article)
content = frontmatter.dumps(frontmatter.Post(article['body'], **metadata))
@ -223,23 +221,28 @@ def export_slug(slug):
print(str(len(exported_authors.items())) + ' authors were exported before')
exported_articles = json.loads(open('../src/data/articles.json').read())
print(str(len(exported_articles.items())) + ' articles were exported before')
shout = shouts_dict.get(slug, None)
author = users_dict.get(shout['authors'][0]['slug'], None)
exported_authors.update({shout['authors'][0]['slug']: author})
exported_articles.update({shout['slug']: shout})
print(shout)
open('../src/data/articles.json', 'w').write(json.dumps(exported_articles,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
open('../src/data/authors.json', 'w').write(json.dumps(exported_authors,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
shout = shouts_dict.get(slug, False)
if shout:
author = users_dict.get(shout['authors'][0]['slug'], None)
exported_authors.update({shout['authors'][0]['slug']: author})
exported_articles.update({shout['slug']: shout})
print(shout)
open('../src/data/articles.json', 'w').write(json.dumps(exported_articles,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
open('../src/data/authors.json', 'w').write(json.dumps(exported_authors,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
else:
print('no old id error!')
print(str(len(shouts_dict)) + ' shouts were migrated')
print(slug)
comments()
print('exported.')
print('finished.')
if __name__ == '__main__':

View File

@ -1,7 +1,7 @@
import datetime
from dateutil.parser import parse as date_parse
import json
from os.path import abspath
from orm import Shout
from orm import Shout, Comment, CommentRating, User
from orm.base import local_session
from migration.html2text import html2text
@ -46,14 +46,32 @@ def migrate(entry):
'''
with local_session() as session:
shout_id = session.query(Shout).filter(Shout.old_id == entry['_id']).first()
return {
author_dict = users_dict[entry['createdBy']]
print(author_dict)
author_id = author_dict['id']
comment_dict = {
'old_id': entry['_id'],
'old_thread': entry['thread'],
'createdBy': users_dict[entry['createdBy']],
'createdAt': entry['createdAt'],
'author': author_id,
'createdAt': date_parse(entry['createdAt']),
'body': html2text(entry['body']),
'shout': shout_id,
'rating': entry['rating'],
'ratings': [] # TODO: ratings in comments
'shout': shout_id
}
return None
if 'rating' in entry:
comment_dict['rating'] = entry['rating']
if 'deleted' in entry:
comment_dict['deleted'] = entry['deleted']
if 'thread' in entry:
comment_dict['old_thread'] = entry['thread']
print(entry.keys())
comment = Comment.create(**comment_dict)
for comment_rating_old in entry.get('ratings',[]):
rater_id = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first()
comment_rating_dict = {
'value': cr['value'],
'createdBy': rater_id,
'createdAt': date_parse(comment_rating_old['createdAt']) or ts
}
comment_rating = CommentRating.create(**comment_rating_dict)
comment['ratings'].append(comment_rating)
return comment

View File

@ -1,20 +1,28 @@
from dateutil.parser import parse
from orm import User
from os.path import abspath
import frontmatter
import json
from orm import Shout
from orm import Shout, Comment, Topic, ShoutRating, User #, TODO: CommentRating
from bs4 import BeautifulSoup
from migration.html2text import html2text
from migration.tables.comments import migrate as migrateComment
from transliterate import translit
from datetime import datetime
from sqlalchemy.exc import IntegrityError
from orm.base import local_session
comments_data = json.loads(open(abspath('migration/data/comments.json')).read())
comments_dict = { x['_id']: x for x in comments_data }
users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
print(str(len(users_dict.items())) + ' users loaded')
topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed
print(str(len(topics_dict.items())) + ' topics loaded')
comments_data = json.loads(open(abspath('migration/data/comments.json')).read())
print(str(len(comments_data)) + ' comments loaded')
comments_by_post = {}
for comment in comments_data:
p = comment['contentItem']
comments_by_post[p] = comments_by_post.get(p, [])
comments_by_post[p].append(comment)
users_dict['0'] = {
'id': 9999999,
'slug': 'discours',
@ -40,6 +48,7 @@ def get_metadata(r):
metadata['authors'] = r.get('authors')
metadata['createdAt'] = r.get('createdAt', ts)
metadata['layout'] = r['layout']
metadata['topics'] = r['topics']
if r.get('cover', False):
metadata['cover'] = r.get('cover')
return metadata
@ -79,7 +88,7 @@ def migrate(entry):
'views': entry.get('views', 0),
'rating': entry.get('rating', 0),
'ratings': [],
'comments': entry.get('comments', []),
'comments': [],
'createdAt': entry.get('createdAt', '2016-03-05 22:22:00.350000')
}
r['slug'] = entry.get('slug', '')
@ -112,8 +121,7 @@ def migrate(entry):
else:
body_html = str(BeautifulSoup(
body_orig, features="html.parser"))
r['body'] = html2text(body_html).replace('****', '**')
r['old_id'] = entry.get('_id')
r['body'] = html2text(body_html)
else:
print(r['slug'] + ': literature has no media')
elif entry.get('type') == 'Video':
@ -134,9 +142,9 @@ def migrate(entry):
if r.get('body') is None:
body_orig = entry.get('body', '')
body_html = str(BeautifulSoup(body_orig, features="html.parser"))
r['body'] = html2text(body_html).replace('****', '**')
r['old_id'] = entry.get('_id')
body = r.get('body')
r['body'] = html2text(body_html)
body = r.get('body', '')
r['old_id'] = entry.get('_id')
user = None
try:
userdata = users_dict.get(entry['createdBy'], users_dict['0'])
@ -167,7 +175,7 @@ def migrate(entry):
User.slug == authordata['slug']).first()
slug = user['slug']
name = user['name']
userpic = user.userpic
userpic = user['userpic']
else:
# no application, no author!
slug = 'discours'
@ -204,42 +212,54 @@ def migrate(entry):
shout_dict['publishedAt'] = ts
del shout_dict['published']
shout_dict['comments'] = []
for cid in r['comments']:
comment = comments_dict[cid]
comment_ratings = []
for cr in comment['ratings']:
comment_ratings.append({
'value': cr['value'],
'createdBy': users_dict[cr['createdBy']],
'createdAt': cr['createdAt'] or ts})
shout_dict['comments'].append({
'old_id': comment['_id'],
'old_thread': comment['thread'], # TODO: old_thread to replyTo logix
'createdBy': users_dict[comment['createdBy']],
'createdAt': comment['createdAt'] or ts,
'body': html2text(comment['body']),
'shout': shout_dict['old_id'],
'rating': comment['rating'],
'ratings': comment_ratings
})
# shout comments
if entry.get('commentedAt', False):
try:
old_comments = comments_by_post.get(shout_dict['old_id'], [])
if len(old_comments) > 0:
shout_dict['comments'] = []
shout_dict['ratings'] = []
for rating in r['ratings']:
shout_dict['ratings'].append({
'value': rating['value'],
'createdBy': users_dict[rating['createdBy']],
'createdAt': r['createdAt'] or ts})
# migrate comments
for entry in old_comments:
comment = migrateComment(entry)
shout_dict['comments'].append(comment)
except KeyError:
print(shout_dict.keys())
raise 'error'
try:
del shout_dict['views'] # FIXME
del shout_dict['rating'] # FIXME
del shout_dict['ratings'] # FIXME
# del shout_dict['comments']
s = Shout.create(**shout_dict) # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state'
topic_slugs = shout_dict['topics']
del shout_dict['topics'] # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state'
del shout_dict['views'] # FIXME: TypeError: 'views' is an invalid keyword argument for Shout
del shout_dict['rating'] # FIXME: TypeError: 'rating' is an invalid keyword argument for Shout
del shout_dict['ratings']
s = Shout.create(**shout_dict)
r['id'] = s.id
if len(entry.get('ratings', [])) > 0:
# TODO: adding shout ratings
'''
shout_dict['ratings'] = []
for shout_rating_old in entry['ratings']:
shout_rating = ShoutRating.create(
rater_id = users_dict[shout_rating_old['createdBy']]['id'],
shout_id = s.id,
value = shout_rating_old['value']
)
shout.ratings.append(shout_rating.id)
'''
for topic_slug in topic_slugs:
topic_dict = topics_dict.get(topic_slug)
if topic_dict:
topic = Topic.create(**topic_dict)
shout.topics = [ topic, ]
shout.save()
except Exception as e:
pass # raise e
r['error'] = 'db error'
# pass
raise e
except Exception as e:
if not r['body']: r['body'] = 'body moved'
raise e

View File

@ -7,9 +7,9 @@ from orm.notification import Notification
from orm.shout import Shout, ShoutAuthor, ShoutTopic, ShoutRating, ShoutViewByDay,\
ShoutRatingStorage, ShoutViewStorage
from orm.base import Base, engine, local_session
from orm.comment import Comment
from orm.comment import Comment, CommentRating
__all__ = ["User", "Role", "Operation", "Permission", "Message", "Shout", "Topic", "Notification"]
__all__ = ["User", "Role", "Operation", "Permission", "Message", "Shout", "Topic", "Notification", "ShoutRating", "Comment", "CommentRating"]
Base.metadata.create_all(engine)
Operation.init_table()

View File

@ -1,7 +1,7 @@
from typing import List
from datetime import datetime
from sqlalchemy import Column, Integer, String, ForeignKey, DateTime
from sqlalchemy import Column, Integer, String, ForeignKey, DateTime, Boolean
from sqlalchemy.orm import relationship
from orm.base import Base
@ -19,14 +19,16 @@ class Comment(Base):
__tablename__ = 'comment'
author: int = Column(ForeignKey("user.id"), nullable=False, comment="Sender")
body: str = Column(String, nullable=False, comment="Body")
body: str = Column(String, nullable=False, comment="Comment Body")
createdAt = Column(DateTime, nullable=False, default = datetime.now, comment="Created at")
updatedAt = Column(DateTime, nullable=True, comment="Updated at")
deletedAt = Column(DateTime, nullable=True, comment="Deleted at")
deletedBy = Column(ForeignKey("user.id"), nullable=True, comment="Deleted by")
shout: int = Column(ForeignKey("shout.id"), nullable=True, comment="Shout ID")
rating: int = Column(Integer, nullable=True, comment="Comment Rating")
ratings = relationship(CommentRating, foreign_keys=CommentRating.comment_id)
old_id: str = Column(String, nullable = True)
deleted: bool = Column(Boolean, nullable = True)
# TODO: work in progress, udpate this code

View File

@ -58,6 +58,7 @@ class User(Base):
ratings = relationship(UserRatings, foreign_keys=UserRatings.user_id)
roles = relationship(lambda: Role, secondary=UserRoles)
topics = relationship(lambda: Topic, secondary=UserTopics)
old_id: str = Column(String, nullable = True)
@classmethod
def get_permission(cls, user_id):