- Run
./scheduleUpdateSuggester 20180312
on tool forge (replace20180312
with the date of the latest JSON dump) - Wait
- Check the logs at
updateSuggester.err
for problems during the creation - Run
sha1sum analyzed-out
(or whatever hashing algorithm you prefer) gzip analyzed-out
- Rsync analyzed-out.gz to your local machine, commit to the wbs_propertypairs repo.
- Load it down to terbium (or some other maintenance host) with
https_proxy=http://webproxy.eqiad.wmnet:8080 wget 'https://github.com/wmde/wbs_propertypairs/raw/master/20180312/wbs_propertypairs.csv.gz'
(again, replace20180312
with the date of the JSON dump you produced). - Unpack it:
gzip -d
- Compare the checksum to the one obtained on tool forge
- Update the actual table:
mwscript extensions/PropertySuggester/maintenance/UpdateTable.php --wiki wikidatawiki --file wbs_propertypairs.csv
- Run
T132839-Workarounds.sh
(on terbium) - Log your changes
Last active
January 21, 2021 19:54
-
-
Save mariushoch/22f4ead44f75c5133e403f465bc279a5 to your computer and use it in GitHub Desktop.
PropertySuggester update tools
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [[ -z $1 ]]; then | |
echo First argument needs to be the json dump use, like 20160905 | |
exit 1 | |
fi | |
# RM old logs | |
rm -f updateSuggester.err | |
rm -f updateSuggester.out | |
jsub -mem 3500m -N updateSuggester $HOME/updateSuggester.sh $1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo -n 'Removing ext ids in item context ' | |
i=0 | |
while [ $i -lt 40 ]; do | |
echo -n '.' | |
sql wikidatawiki --write -- --execute "DELETE FROM wbs_propertypairs WHERE pid1 IN (SELECT pi_property_id FROM wb_property_info WHERE pi_type = 'external-id') AND context = 'item' LIMIT 5000" | |
let i++ | |
sleep 3 | |
done | |
pids=(17 18 276 301 373 463 495 571 641 1344 1448 1476) | |
for pid in "${pids[@]}"; do | |
echo | |
echo "Removing P$pid item context" | |
sql wikidatawiki --write -- --execute "DELETE FROM wbs_propertypairs WHERE pid1 = '$pid' AND context = 'item' LIMIT 5000" | |
done | |
echo | |
echo "Removing P31 qualifier suggestions for P569, P570, P571, P576" | |
sql wikidatawiki --write -- --execute "DELETE FROM wbs_propertypairs WHERE context = 'qualifier' AND pid1 IN(569, 570, 571, 576) AND pid2 = 31 LIMIT 5000" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [[ -z $1 ]]; then | |
echo First argument needs to be the json dump use, like 20160905 | |
exit 1 | |
fi | |
set -ex | |
DUMP=/public/dumps/public/wikidatawiki/entities/$1/wikidata-$1-all.json.gz | |
if [ ! -s $DUMP ]; then | |
DUMP=$HOME/PropertySuggester-wikidata-$1-all.json.gz | |
fi | |
if [ ! -s $DUMP ]; then | |
echo $DUMP not found, manually downloading. | |
echo | |
curl https://dumps.wikimedia.org/wikidatawiki/entities/$1/wikidata-$1-all.json.gz > $DUMP | |
fi | |
cd $HOME/wikibase-property-suggester-scripts | |
# Active virtualenv | |
. bin/activate | |
export LC_ALL=en_US.UTF-8 | |
# XXX: Could also use /tmp here instead of $HOME to take load of NFS, but then again /tmp might be to small | |
# XXX: What about /mnt/nfs/labstore1003-scratch? | |
PYTHONPATH=build/lib/ python3 ./build/lib/scripts/dumpconverter.py $DUMP > $HOME/dumpconvert.csv | |
PYTHONPATH=build/lib/ python3 ./build/lib/scripts/analyzer.py $HOME/dumpconvert.csv $HOME/analyzed-out | |
rm $HOME/dumpconvert.csv | |
rm -f $HOME/PropertySuggester-wikidata-$1-all.json.gz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks, I'll also add a note for next month to check! :)