Module:Tree

From Wikidata
Jump to navigation Jump to search

Documentation for this module may be created at Module:Tree/doc

local p = {}

--[[
-- External dependendies
--]]

local log = mw.log

local split = mw.text.split
local tag = mw.text.tag

local function getFrame(frame)
	return frame or mw.getCurrentFrame()
end

local _IntLang
local function getLang(frame)
	local lang = frame.args.lang
	if not lang or lang == '' then
		if not _IntLang then
			_IntLang = frame:preprocess('{{Int:Lang}}')
		end
		lang = _IntLang
	end
	return lang
end

local getEntityObject = mw.wikibase.getEntityObject -- getEntityObject(itemId) is a costly function call

local wikidata = require('Module:Wikidata')
local getClaims = wikidata.getClaims -- getClaims{item = itemId, options...} is a costly function call
local formatEntity = wikidata.formatEntity -- formatEntity(entity, {options...})
local _formatStatements = wikidata._formatStatements -- _formatStatements{entity = entity, options...}
local getmainid = wikidata.getmainid
local getDate = wikidata.getDate
local comparedate = wikidata.comparedate

local compactdaterange = require('Module:Daterange').compactdaterange

local inparentheses = require('Module:Linguistic').inparentheses

--[[
-- Internal implementation
--]]

-- Function that when called several times will generate a sequence of strings.
-- If g = generator('0', '1', '2'), then g() successively returns '0', '1', '2', '10', '11', '12', '20'...
-- See http://lua-users.org/wiki/LuaCoroutinesVersusPythonGenerators for lua doc.
local function Generator(...)
	local SYMSET = type(...) == 'array' and ... or { ... }
	local BASE = #SYMSET; assert(BASE > 1, 'not enough base symbols')
	local INT, COUNT = math.floor, 0
	return function()
		local symbol, value = '', COUNT
		COUNT = COUNT + 1
		repeat
			local quotient = INT(value / BASE)
			symbol = SYMSET[value - quotient * BASE + 1] .. symbol
			value = quotient
		until value <= 0
		return symbol
	end
end

local function outputTree(query, rootItemId, maxdepth, lang, formatting)
	-- Topological sort and meta data of the DAG (https://en.wikipedia.org/wiki/Topological_sorting)
	-- while there are unmarked nodes do
	--    select an unmarked node n
	--    visit(n)
	-- function visit(node n)
	--    if n has a temporary opened mark then stop (not a DAG)
	--    if n is not marked (i.e. has not been visited yet) then
	--        mark n temporarily as opened
	--        for each node m with an edge from n to m do
	--            visit(m)
	--        unmark n permanently
	--        add n to head of L
	local data, SYMBOL, NPARENTS, LOOP, DEEPEST = {}, 0, 1, 2, 3
	local function setdata(itemId, key, val)
		if data[itemId] == nil then data[itemId] = {} end
		data[itemId][key] = val
	end
	local function getdata(itemId, key, defval)
		return data[itemId] and data[itemId][key] or defval
	end
	maxdepth = maxdepth or 10
	local children = {}

	-- This function visits and builds the tree, DAG or graph, annotated by:
	-- * deepest (depth of their deepest node, stopping at looping nodes or maxdepth)
	-- * nparent (number of nodes pointing to it, for generating link symbols)
	local visiting = {}
	local function visit(itemId, depth) -- Recursive function
		if visiting[itemId] then -- don't query children again, just adjust the deepest level
			setdata(itemId, LOOP, true)
			local deepest = getdata(itemId, DEEPEST, 0)
			if depth > deepest then
				deepest = depth
				setdata(itemId, DEEPEST, deepest)
			end
			return deepest
		end
		-- This node may have already been visited from another parent (possibly as a LOOP or incomplete)
		log('visiting '..itemId)
		visiting[itemId] = true
		if children[itemId] then -- already visited and complete, just adjust the deepest level
			local deepest = getdata(itemId, DEEPEST, 0)
			if depth > deepest then
				deepest = depth
				setdata(itemId, DEEPEST, deepest)
			end
		elseif depth <= maxdepth then
			-- first visit, query children, increment their number of parents
			local childIds = {}
			local deepest = getdata(itemId, DEEPEST, depth)
			query.item = itemId
			local claims = getClaims(query) -- getClaims(query) is a costly call
			if claims then
				for _, claim in pairs(claims) do
					local childId = 'Q' .. claim.mainsnak.datavalue.value['numeric-id']
					table.insert(childIds, childId)
					setdata(childId, NPARENTS, getdata(childId, NPARENTS, 0) + 1)
				end
			end
			for _, childId in ipairs(childIds) do
				local deep = visit(childId, depth + 1) -- visit() is recursive
				if deep > deepest then
					deepest = deep
				end
			end
			setdata(itemId, DEEPEST, deepest)
			children[itemId] = childIds
		end -- else leave incomplete (childrens[itemId] remains nil)
		visiting[itemId] = nil
		return depth + 1
	end
	visit(rootItemId, 1) -- visit() is recursive

	if type(formatting) ~= 'function' then
		if type(formatting) == 'table' then
			formatting = function(entity)
				return tag('span', { style = formatting, }, formatEntity(entity, { lang = lang, }))
			end
		else
			formatting = function(entity)
				return formatEntity(entity, { lang = lang, })
			end
		end
	end
	local content = formatting(getEntityObject(rootItemId)) -- getEntityObject(itemId) is a costly call

	local genSymbol = Generator('@', '#', '%', '+', '∗', '¤', '§', '⁋', '†', '‡', '⋔', '℧', '∇', '⁂'
			--[[these symbols have font/size problem
			, '※', '⌘', '⦾', '⦿', '★', '☆', '✴'
			]]
		)

	local function formatSymbol(symbol) -- link inside tree
		return tag('bdi', { class = 'Unicode', }, tag('small', {}, '(' .. (symbol or 'missing symbol') .. ')'))
	end
	local function genAnchor(itemId)
		return tag('i', { id = rootItemId .. itemId, }, '')
	end
	local function anchorLink(itemId, content)
		return '[[#' .. rootItemId .. itemId .. '|' .. content .. ']]'
	end
	local function fmtTreeLinks(content)
		return tag('sup', {}, content)
	end

	local alreadyOuted = {}
	local arrow = mw.language.new(lang):getArrow()
	local function renderTree(itemId) -- recursive function
		local content = formatting(getEntityObject(itemId))
		-- log('render ' .. itemId)
		if getdata(itemId, NPARENTS, 0) > 1 and getdata(itemId, SYMBOL) == nil then
			setdata(itemId, SYMBOL, genSymbol())
		end

		if getdata(itemId, LOOP) then
			if getdata(itemId, LOOP) == true then
				setdata(itemId, LOOP, 1) -- treated
				content = fmtTreeLinks(' ∞') .. ' ' .. content .. genAnchor(itemId)
			else
				return content .. fmtTreeLinks('∞ ↑ ' .. anchorLink(itemId, formatSymbol(getdata(itemId, SYMBOL))))
			end
		end
 		local childIds = children[itemId]
		if not childIds then
			return content .. ' ' .. fmtTreeLinks('…') -- incomplete
		end
		local nChildIds = #childIds
		if nChildIds == 0 then
			-- has no children? display as leaf
			return ' ' .. content .. ' ⊸ ' -- U+22B8: multijection symbol from maths, an horizontal stroke ended by a ring to the right
		end
		setdata(itemId, NPARENTS, getdata(itemId, NPARENTS, 0) - 1)
		if not alreadyOuted[itemId] then
			-- sort children topologycally
	    	table.sort(childIds, function(a, b) return getdata(a, DEEPEST, 0) < getdata(b, DEEPEST, 0) end)
			local childItems = {}
 		    for i, childId in ipairs(childIds) do
		        table.insert(childItems,
					tag('li',  i == nChildIds and { class = 'lastline', } or {}, renderTree(childId))) -- renderTree() is recursive
		    end
 		    alreadyOuted[itemId] =
				(getdata(itemId, NPARENTS, 0) > 0 and
					fmtTreeLinks(
						genAnchor(itemId) ..
						arrow --[[linked from up]] ..
						formatSymbol(getdata(itemId, SYMBOL))
					) .. ' '
				or ''
				) .. ' ' .. content .. tag('ul', {}, table.concat(childItems))
		end
 		if getdata(itemId, NPARENTS, 0) > 0 then
			return ' ' .. content .. ' ' ..
				fmtTreeLinks(
					anchorLink(itemId,
						formatSymbol(getdata(itemId, SYMBOL)) ..
						arrow --[[linked to down]]
					)
				)
		end
		return alreadyOuted[itemId]
	end
	return tag('li', {}, renderTree(rootItemId)) -- renderTree() is recursive
end

local treeviewStylesDone
local function treeviewStyles(frame)
	frame = getFrame(frame)
	if treeviewStylesDone or not frame then
		return ''
	end
	treeviewStylesDone = true
	return frame:extensionTag('templatestyles', '', { src = 'Tree/styles.css', })
end

--[[
-- Exported functions
--]]

function p._outputForest(query, itemIds, maxdepth, styles, lang, formatting)
	if tonumber(query.property) then --legacy format
		query.property = 'P' .. tostring(query.property)
	end
	query.excludespecial = true
	local trees = {}
	for _, itemId in ipairs(itemIds) do
		table.insert(trees, outputTree(query, itemId, maxdepth, lang, formatting))
	end
	return styles .. tag('div', { class = 'treeview', }, tag('ul', {}, table.concat(trees)))
end

function p.familytree(frame)
    local itemIds = split(frame.args[1], ' ')
    if #itemIds == 0 then
        return 'No items passed as parameter'
    end
    local dates = {} -- cache for keeping the comparable date values below during sort
	local query = {
		property = 'P40', -- Property:Child
		sorttype = function(entity1, entity2) -- sort by birth date
			-- this is *memory* intensive there should be a way to desactive sorting above some number of nodes
			if not dates[item1] then
				dates[entity1] = getDate(getmainid(entity1)) or ''
			end
			if not dates[itemId2] then
				dates[entity2] = getDate(getmainid(entity2)) or ''
			end
			return comparedate(dates[entity1], dates[entity2])
		end,
	}
	local function formatting(entity)
		local val = formatEntity(entity, { lang = lang, })
		local birthyear
		if entity.claims and entity.claims.P569 and entity.claims.P569[1].mainsnak.snaktype == 'value' then
			birthyear = entity.claims.P569[1].mainsnak.datavalue.value
		end
		local deathyear
		if entity.claims and entity.claims.P570 and entity.claims.P570[1].mainsnak.snaktype == 'value' then
			deathyear = entity.claims.P570[1].mainsnak.datavalue.value
		end
		if birthyear or deathyear then
			val = val .. ' ' inparentheses(
				compactdaterange({ startpoint = birthyear, endpoint = deathyear, }, lang)
				, lang)
		end
		local desc = _formatStatements{ entity = entity, lang = lang, property = { 'P39', 'P106' }, }
		if desc then
			val = val .. ' ' .. tag('small', {}, desc)
		end
		-- local gender = _formatStatements{ entity = entity, lang = lang, property = 'P21', numval = 1, displayformat = 'raw', }
		return tag('span', {
				--[==[
				style = 'background:' .. (
					--[[male]]    gender == 'Q6581097' and '#069' or
					--[[female]]  gender == 'Q6581072' and '#900' or
					--[[neutral]] '#888'
					) .. 'color:#FFF;border:thin solid #AAA',
				--]==]
			}, val)
	end
	return p._outputForest(query, itemIds, tonumber(frame.args.maxdepth) or 3, treeviewStyles(frame), getLang(frame), formatting--[=[]=])
end

function p.outputTreeFromTemplate(frame)
    local frame = frame:getParent()
	local query = frame.args
    local itemIds = split(query.items, ' ')
    if #itemIds == 0 then
        return 'No items passed as parameter'
    end
	return p._outputForest(query, itemIds, tonumber(query.recursion) or 10, treeviewStyles(frame), getLang(frame))
end

return p