export separated
This commit is contained in:
parent
fe28c3918c
commit
1714a60e99
40
migrate.py
40
migrate.py
|
@ -114,6 +114,7 @@ def shouts():
|
||||||
counter = 0
|
counter = 0
|
||||||
discours_author = 0
|
discours_author = 0
|
||||||
content_data = json.loads(open('migration/data/content_items.json').read())
|
content_data = json.loads(open('migration/data/content_items.json').read())
|
||||||
|
content_dict = { x['_id']:x for x in content_data }
|
||||||
newdata = {}
|
newdata = {}
|
||||||
print(str(len(content_data)) + ' entries loaded. now migrating...')
|
print(str(len(content_data)) + ' entries loaded. now migrating...')
|
||||||
errored = []
|
errored = []
|
||||||
|
@ -125,7 +126,7 @@ def shouts():
|
||||||
line = str(counter+1) + ': ' + shout['slug'] + " @" + str(author)
|
line = str(counter+1) + ': ' + shout['slug'] + " @" + str(author)
|
||||||
print(line)
|
print(line)
|
||||||
counter += 1
|
counter += 1
|
||||||
if author == 'discours.io':
|
if author == 'discours':
|
||||||
discours_author += 1
|
discours_author += 1
|
||||||
open('./shouts.id.log', 'a').write(line + '\n')
|
open('./shouts.id.log', 'a').write(line + '\n')
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -136,25 +137,35 @@ def shouts():
|
||||||
limit = int(sys.argv[2]) if len(sys.argv) > 2 else len(content_data)
|
limit = int(sys.argv[2]) if len(sys.argv) > 2 else len(content_data)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
limit = len(content_data)
|
limit = len(content_data)
|
||||||
export_list = [i for i in newdata.items() if i[1]['layout'] == 'article' and i[1]['published']]
|
|
||||||
export_list = sorted(export_list, key=lambda item: item[1]['createdAt'] or OLD_DATE, reverse=True)[:limit]
|
|
||||||
export_clean = {}
|
|
||||||
for (slug, a) in export_list:
|
|
||||||
export_clean[a['slug']] = extract_images(a)
|
|
||||||
metadata = get_metadata(a)
|
|
||||||
content = frontmatter.dumps(frontmatter.Post(a['body'], **metadata))
|
|
||||||
open('../content/discours.io/'+a['slug']+'.md', 'w').write(content)
|
|
||||||
open('migration/data/shouts.dict.json',
|
open('migration/data/shouts.dict.json',
|
||||||
'w').write(json.dumps(newdata, cls=DateTimeEncoder))
|
'w').write(json.dumps(newdata, cls=DateTimeEncoder))
|
||||||
|
print(str(counter) + '/' + str(len(content_data)) +
|
||||||
|
' content items were migrated')
|
||||||
|
print(str(discours_author) + ' from them by @discours')
|
||||||
|
|
||||||
|
def export_shouts(limit):
|
||||||
|
print('reading json...')
|
||||||
|
newdata = json.loads(open('migration/data/shouts.dict.json', 'r').read())
|
||||||
|
print(str(len(newdata.keys())) + ' loaded')
|
||||||
|
export_list = [i for i in newdata.items() if i[1]['layout'] == 'article' and i[1]['published']]
|
||||||
|
export_list = sorted(export_list, key=lambda item: item[1]['createdAt'] or OLD_DATE, reverse=True)
|
||||||
|
print(str(len(export_list)) + ' filtered')
|
||||||
|
export_list = export_list[:limit or len(export_list)]
|
||||||
|
export_clean = {}
|
||||||
|
for (slug, article) in export_list:
|
||||||
|
if article['layout'] == 'article':
|
||||||
|
export_clean[article['slug']] = extract_images(article)
|
||||||
|
metadata = get_metadata(article)
|
||||||
|
content = frontmatter.dumps(frontmatter.Post(article['body'], **metadata))
|
||||||
|
open('../content/discours.io/'+slug+'.md', 'w').write(content)
|
||||||
|
# print(slug)
|
||||||
|
# open('../content/discours.io/'+slug+'.html', 'w').write(content_dict[article['old_id']]['body'])
|
||||||
open('../src/data/articles.json', 'w').write(json.dumps(dict(export_clean),
|
open('../src/data/articles.json', 'w').write(json.dumps(dict(export_clean),
|
||||||
cls=DateTimeEncoder,
|
cls=DateTimeEncoder,
|
||||||
indent=4,
|
indent=4,
|
||||||
sort_keys=True,
|
sort_keys=True,
|
||||||
ensure_ascii=False))
|
ensure_ascii=False))
|
||||||
print(str(counter) + '/' + str(len(content_data)) +
|
print(str(len(export_clean.items())) + ' exported')
|
||||||
' content items were migrated')
|
|
||||||
print(str(len(export_list)) + ' shouts were exported')
|
|
||||||
print(str(discours_author) + ' from them by @discours.io')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -176,6 +187,9 @@ if __name__ == '__main__':
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
shouts()
|
shouts()
|
||||||
|
elif sys.argv[1] == "export_shouts":
|
||||||
|
limit = int(sys.argv[2]) if len(sys.argv) > 2 else None
|
||||||
|
export_shouts(limit)
|
||||||
elif sys.argv[1] == "all":
|
elif sys.argv[1] == "all":
|
||||||
users()
|
users()
|
||||||
topics()
|
topics()
|
||||||
|
|
|
@ -463,7 +463,7 @@ class HTML2Text(HTMLParser.HTMLParser):
|
||||||
if start:
|
if start:
|
||||||
if has_key(attrs, 'href') and not (self.skip_internal_links and attrs['href'].startswith('#')):
|
if has_key(attrs, 'href') and not (self.skip_internal_links and attrs['href'].startswith('#')):
|
||||||
self.astack.append(attrs)
|
self.astack.append(attrs)
|
||||||
self.maybe_automatic_link = attrs['href']
|
self.maybe_automatic_link = attrs['href'][:2000]
|
||||||
else:
|
else:
|
||||||
self.astack.append(None)
|
self.astack.append(None)
|
||||||
else:
|
else:
|
||||||
|
@ -903,4 +903,4 @@ def main():
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -15,7 +15,7 @@ users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
|
||||||
topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed
|
topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed
|
||||||
users_dict['0'] = {
|
users_dict['0'] = {
|
||||||
'id': 9999999,
|
'id': 9999999,
|
||||||
'slug': 'discours.io',
|
'slug': 'discours',
|
||||||
'name': 'Дискурс',
|
'name': 'Дискурс',
|
||||||
'userpic': 'https://discours.io/images/logo-mini.svg',
|
'userpic': 'https://discours.io/images/logo-mini.svg',
|
||||||
'createdAt': '2016-03-05 22:22:00.350000'
|
'createdAt': '2016-03-05 22:22:00.350000'
|
||||||
|
@ -109,7 +109,7 @@ def migrate(entry):
|
||||||
else:
|
else:
|
||||||
body_html = str(BeautifulSoup(
|
body_html = str(BeautifulSoup(
|
||||||
body_orig, features="html.parser"))
|
body_orig, features="html.parser"))
|
||||||
r['body'] = html2text(body_html).replace('****', '**')
|
r['body'] = body_html # html2text(body_html).replace('****', '**')
|
||||||
r['old_id'] = entry.get('_id')
|
r['old_id'] = entry.get('_id')
|
||||||
else:
|
else:
|
||||||
print(r['slug'] + ': literature has no media')
|
print(r['slug'] + ': literature has no media')
|
||||||
|
@ -131,7 +131,7 @@ def migrate(entry):
|
||||||
if r.get('body') is None:
|
if r.get('body') is None:
|
||||||
body_orig = entry.get('body', '')
|
body_orig = entry.get('body', '')
|
||||||
body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
body_html = str(BeautifulSoup(body_orig, features="html.parser"))
|
||||||
r['body'] = html2text(body_html).replace('****', '**')
|
r['body'] = body_html # html2text(body_html).replace('****', '**')
|
||||||
r['old_id'] = entry.get('_id')
|
r['old_id'] = entry.get('_id')
|
||||||
body = r.get('body')
|
body = r.get('body')
|
||||||
user = None
|
user = None
|
||||||
|
@ -167,7 +167,7 @@ def migrate(entry):
|
||||||
userpic = user.userpic
|
userpic = user.userpic
|
||||||
else:
|
else:
|
||||||
# no application, no author!
|
# no application, no author!
|
||||||
slug = 'discours.io'
|
slug = 'discours'
|
||||||
name = 'Дискурс'
|
name = 'Дискурс'
|
||||||
userpic = 'https://discours.io/images/logo-mini.svg'
|
userpic = 'https://discours.io/images/logo-mini.svg'
|
||||||
with local_session() as session:
|
with local_session() as session:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user