1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000 |
- <?php
- declare(strict_types=1);
- /**
- * FullTextSearch - Full text search framework for Nextcloud
- *
- * This file is licensed under the Affero General Public License version 3 or
- * later. See the COPYING file.
- *
- * @author Maxence Lange <maxence@artificial-owl.com>
- * @copyright 2018
- * @license GNU AGPL version 3 or any later version
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as
- * published by the Free Software Foundation, either version 3 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- */
- namespace OC\FullTextSearch\Model;
- use JsonSerializable;
- use OCP\FullTextSearch\Model\IDocumentAccess;
- use OCP\FullTextSearch\Model\IIndex;
- use OCP\FullTextSearch\Model\IIndexDocument;
- /**
- * Class IndexDocument
- *
- * This is one of the main class of the FullTextSearch, used as a data transfer
- * object. An IndexDocument is created to manage documents around FullTextSearch,
- * during an index and during a search.
- * The uniqueness of an IndexDocument is made by the Id of the Content Provider
- * and the Id of the original document within the Content Provider.
- *
- * We will call original document the source from which the IndexDocument is
- * generated. As an example, an original document can be a file, a mail, ...
- *
- * @since 15.0.0
- *
- * @package OC\FullTextSearch\Model
- */
- class IndexDocument implements IIndexDocument, JsonSerializable {
- /** @var string */
- protected $id = '';
- /** @var string */
- protected $providerId = '';
- /** @var DocumentAccess */
- protected $access;
- /** @var IIndex */
- protected $index;
- /** @var int */
- protected $modifiedTime = 0;
- /** @var string */
- protected $source = '';
- /** @var array */
- protected $tags = [];
- /** @var array */
- protected $metaTags = [];
- /** @var array */
- protected $subTags = [];
- /** @var string */
- protected $title = '';
- /** @var string */
- protected $content = '';
- /** @var string */
- protected $hash = '';
- /** @var array */
- protected $parts = [];
- /** @var string */
- protected $link = '';
- /** @var array */
- protected $more = [];
- /** @var array */
- protected $excerpts = [];
- /** @var string */
- protected $score = '';
- /** @var array */
- protected $info = [];
- /** @var int */
- protected $contentEncoded = 0;
- /**
- * IIndexDocument constructor.
- *
- * On creation, we assure the uniqueness of the object using the providerId
- * and the Id of the original document.
- *
- * @since 15.0.0
- *
- * @param string $providerId
- * @param string $documentId
- */
- public function __construct(string $providerId, string $documentId) {
- $this->providerId = $providerId;
- $this->id = $documentId;
- }
- /**
- * Returns the Id of the original document.
- *
- * @since 15.0.0
- *
- * @return string
- */
- final public function getId(): string {
- return $this->id;
- }
- /**
- * Returns the Id of the provider.
- *
- * @since 15.0.0
- *
- * @return string
- */
- final public function getProviderId(): string {
- return $this->providerId;
- }
- /**
- * Set the Index related to the IIndexDocument.
- *
- * @see IIndex
- *
- * @since 15.0.0
- *
- * @param IIndex $index
- *
- * @return IIndexDocument
- */
- final public function setIndex(IIndex $index): IIndexDocument {
- $this->index = $index;
- return $this;
- }
- /**
- * Get the Index.
- *
- * @since 15.0.0
- *
- * @return IIndex
- */
- final public function getIndex(): IIndex {
- return $this->index;
- }
- /**
- * return if Index is defined.
- *
- * @since 16.0.0
- *
- * @return bool
- */
- final public function hasIndex(): bool {
- return ($this->index !== null);
- }
- /**
- * Set the modified time of the original document.
- *
- * @since 15.0.0
- *
- * @param int $modifiedTime
- *
- * @return IIndexDocument
- */
- final public function setModifiedTime(int $modifiedTime): IIndexDocument {
- $this->modifiedTime = $modifiedTime;
- return $this;
- }
- /**
- * Get the modified time of the original document.
- *
- * @since 15.0.0
- *
- * @return int
- */
- final public function getModifiedTime(): int {
- return $this->modifiedTime;
- }
- /**
- * Check if the original document of the IIndexDocument is older than $time.
- *
- * @since 15.0.0
- *
- * @param int $time
- *
- * @return bool
- */
- final public function isOlderThan(int $time): bool {
- return ($this->modifiedTime < $time);
- }
- /**
- * Set the read rights of the original document using a IDocumentAccess.
- *
- * @see IDocumentAccess
- *
- * @since 15.0.0
- *
- * @param IDocumentAccess $access
- *
- * @return $this
- */
- final public function setAccess(IDocumentAccess $access): IIndexDocument {
- $this->access = $access;
- return $this;
- }
- /**
- * Get the IDocumentAccess related to the original document.
- *
- * @since 15.0.0
- *
- * @return IDocumentAccess
- */
- final public function getAccess(): IDocumentAccess {
- return $this->access;
- }
- /**
- * Add a tag to the list.
- *
- * @since 15.0.0
- *
- * @param string $tag
- *
- * @return IIndexDocument
- */
- final public function addTag(string $tag): IIndexDocument {
- $this->tags[] = $tag;
- return $this;
- }
- /**
- * Set the list of tags assigned to the original document.
- *
- * @since 15.0.0
- *
- * @param array $tags
- *
- * @return IIndexDocument
- */
- final public function setTags(array $tags): IIndexDocument {
- $this->tags = $tags;
- return $this;
- }
- /**
- * Get the list of tags assigned to the original document.
- *
- * @since 15.0.0
- *
- * @return array
- */
- final public function getTags(): array {
- return $this->tags;
- }
- /**
- * Add a meta tag to the list.
- *
- * @since 15.0.0
- *
- * @param string $tag
- *
- * @return IIndexDocument
- */
- final public function addMetaTag(string $tag): IIndexDocument {
- $this->metaTags[] = $tag;
- return $this;
- }
- /**
- * Set the list of meta tags assigned to the original document.
- *
- * @since 15.0.0
- *
- * @param array $tags
- *
- * @return IIndexDocument
- */
- final public function setMetaTags(array $tags): IIndexDocument {
- $this->metaTags = $tags;
- return $this;
- }
- /**
- * Get the list of meta tags assigned to the original document.
- *
- * @since 15.0.0
- *
- * @return array
- */
- final public function getMetaTags(): array {
- return $this->metaTags;
- }
- /**
- * Add a sub tag to the list.
- *
- * @since 15.0.0
- *
- * @param string $sub
- * @param string $tag
- *
- * @return IIndexDocument
- */
- final public function addSubTag(string $sub, string $tag): IIndexDocument {
- if (!array_key_exists($sub, $this->subTags)) {
- $this->subTags[$sub] = [];
- }
- $this->subTags[$sub][] = $tag;
- return $this;
- }
- /**
- * Set the list of sub tags assigned to the original document.
- *
- * @since 15.0.0
- *
- * @param array $tags
- *
- * @return IIndexDocument
- */
- final public function setSubTags(array $tags): IIndexDocument {
- $this->subTags = $tags;
- return $this;
- }
- /**
- * Get the list of sub tags assigned to the original document.
- * If $formatted is true, the result will be formatted in a one
- * dimensional array.
- *
- * @since 15.0.0
- *
- * @param bool $formatted
- *
- * @return array
- */
- final public function getSubTags(bool $formatted = false): array {
- if ($formatted === false) {
- return $this->subTags;
- }
- $subTags = [];
- $ak = array_keys($this->subTags);
- foreach ($ak as $source) {
- $tags = $this->subTags[$source];
- foreach ($tags as $tag) {
- $subTags[] = $source . '_' . $tag;
- }
- }
- return $subTags;
- }
- /**
- * Set the source of the original document.
- *
- * @since 15.0.0
- *
- * @param string $source
- *
- * @return IIndexDocument
- */
- final public function setSource(string $source): IIndexDocument {
- $this->source = $source;
- return $this;
- }
- /**
- * Get the source of the original document.
- *
- * @since 15.0.0
- *
- * @return string
- */
- final public function getSource(): string {
- return $this->source;
- }
- /**
- * Set the title of the original document.
- *
- * @since 15.0.0
- *
- * @param string $title
- *
- * @return IIndexDocument
- */
- final public function setTitle(string $title): IIndexDocument {
- $this->title = $title;
- return $this;
- }
- /**
- * Get the title of the original document.
- *
- * @since 15.0.0
- *
- * @return string
- */
- final public function getTitle(): string {
- return $this->title;
- }
- /**
- * Set the content of the document.
- * $encoded can be NOT_ENCODED or ENCODED_BASE64 if the content is raw or
- * encoded in base64.
- *
- * @since 15.0.0
- *
- * @param string $content
- * @param int $encoded
- *
- * @return IIndexDocument
- */
- final public function setContent(string $content, int $encoded = 0): IIndexDocument {
- $this->content = $content;
- $this->contentEncoded = $encoded;
- return $this;
- }
- /**
- * Get the content of the original document.
- *
- * @since 15.0.0
- *
- * @return string
- */
- final public function getContent(): string {
- return $this->content;
- }
- /**
- * Returns the type of the encoding on the content.
- *
- * @since 15.0.0
- *
- * @return int
- */
- final public function isContentEncoded(): int {
- return $this->contentEncoded;
- }
- /**
- * Return the size of the content.
- *
- * @since 15.0.0
- *
- * @return int
- */
- final public function getContentSize(): int {
- return strlen($this->getContent());
- }
- /**
- * Generate an hash, based on the content of the original document.
- *
- * @since 15.0.0
- *
- * @return IIndexDocument
- */
- final public function initHash(): IIndexDocument {
- if ($this->getContent() === '' || is_null($this->getContent())) {
- return $this;
- }
- $this->hash = hash("md5", $this->getContent());
- return $this;
- }
- /**
- * Set the hash of the original document.
- *
- * @since 15.0.0
- *
- * @param string $hash
- *
- * @return IIndexDocument
- */
- final public function setHash(string $hash): IIndexDocument {
- $this->hash = $hash;
- return $this;
- }
- /**
- * Get the hash of the original document.
- *
- * @since 15.0.0
- *
- * @return string
- */
- final public function getHash(): string {
- return $this->hash;
- }
- /**
- * Add a part, identified by a string, and its content.
- *
- * It is strongly advised to use alphanumerical chars with no space in the
- * $part string.
- *
- * @since 15.0.0
- *
- * @param string $part
- * @param string $content
- *
- * @return IIndexDocument
- */
- final public function addPart(string $part, string $content): IIndexDocument {
- $this->parts[$part] = $content;
- return $this;
- }
- /**
- * Set all parts and their content.
- *
- * @since 15.0.0
- *
- * @param array $parts
- *
- * @return IIndexDocument
- */
- final public function setParts(array $parts): IIndexDocument {
- $this->parts = $parts;
- return $this;
- }
- /**
- * Get all parts of the IIndexDocument.
- *
- * @since 15.0.0
- *
- * @return array
- */
- final public function getParts(): array {
- return $this->parts;
- }
- /**
- * Add a link, usable by the frontend.
- *
- * @since 15.0.0
- *
- * @param string $link
- *
- * @return IIndexDocument
- */
- final public function setLink(string $link): IIndexDocument {
- $this->link = $link;
- return $this;
- }
- /**
- * Get the link.
- *
- * @since 15.0.0
- *
- * @return string
- */
- final public function getLink(): string {
- return $this->link;
- }
- /**
- * Set more information that couldn't be set using other method.
- *
- * @since 15.0.0
- *
- * @param array $more
- *
- * @return IIndexDocument
- */
- final public function setMore(array $more): IIndexDocument {
- $this->more = $more;
- return $this;
- }
- /**
- * Get more information.
- *
- * @since 15.0.0
- *
- * @return array
- */
- final public function getMore(): array {
- return $this->more;
- }
- /**
- * Add some excerpt of the content of the original document, usually based
- * on the search request.
- *
- * @since 16.0.0
- *
- * @param string $source
- * @param string $excerpt
- *
- * @return IIndexDocument
- */
- final public function addExcerpt(string $source, string $excerpt): IIndexDocument {
- $this->excerpts[] =
- [
- 'source' => $source,
- 'excerpt' => $this->cleanExcerpt($excerpt)
- ];
- return $this;
- }
- /**
- * Set all excerpts of the content of the original document.
- *
- * @since 16.0.0
- *
- * @param array $excerpts
- *
- * @return IIndexDocument
- */
- final public function setExcerpts(array $excerpts): IIndexDocument {
- $new = [];
- foreach ($excerpts as $entry) {
- $new[] = [
- 'source' => $entry['source'],
- 'excerpt' => $this->cleanExcerpt($entry['excerpt'])
- ];
- }
- $this->excerpts = $new;
- return $this;
- }
- /**
- * Get all excerpts of the content of the original document.
- *
- * @since 15.0.0
- *
- * @return array
- */
- final public function getExcerpts(): array {
- return $this->excerpts;
- }
- /**
- * Clean excerpt.
- *
- * @since 16.0.0
- *
- * @param string $excerpt
- * @return string
- */
- final private function cleanExcerpt(string $excerpt): string {
- $excerpt = str_replace("\\n", ' ', $excerpt);
- $excerpt = str_replace("\\r", ' ', $excerpt);
- $excerpt = str_replace("\\t", ' ', $excerpt);
- $excerpt = str_replace("\n", ' ', $excerpt);
- $excerpt = str_replace("\r", ' ', $excerpt);
- $excerpt = str_replace("\t", ' ', $excerpt);
- return $excerpt;
- }
- /**
- * Set the score to the result assigned to this document during a search
- * request.
- *
- * @since 15.0.0
- *
- * @param string $score
- *
- * @return IIndexDocument
- */
- final public function setScore(string $score): IIndexDocument {
- $this->score = $score;
- return $this;
- }
- /**
- * Get the score.
- *
- * @since 15.0.0
- *
- * @return string
- */
- final public function getScore(): string {
- return $this->score;
- }
- /**
- * Set some information about the original document that will be available
- * to the front-end when displaying search result. (as string)
- * Because this information will not be indexed, this method can also be
- * used to manage some data while filling the IIndexDocument before its
- * indexing.
- *
- * @since 15.0.0
- *
- * @param string $info
- * @param string $value
- *
- * @return IIndexDocument
- */
- final public function setInfo(string $info, string $value): IIndexDocument {
- $this->info[$info] = $value;
- return $this;
- }
- /**
- * Get an information about a document. (string)
- *
- * @since 15.0.0
- *
- * @param string $info
- * @param string $default
- *
- * @return string
- */
- final public function getInfo(string $info, string $default = ''): string {
- if (!key_exists($info, $this->info)) {
- return $default;
- }
- return $this->info[$info];
- }
- /**
- * Set some information about the original document that will be available
- * to the front-end when displaying search result. (as array)
- * Because this information will not be indexed, this method can also be
- * used to manage some data while filling the IIndexDocument before its
- * indexing.
- *
- * @since 15.0.0
- *
- * @param string $info
- * @param array $value
- *
- * @return IIndexDocument
- */
- final public function setInfoArray(string $info, array $value): IIndexDocument {
- $this->info[$info] = $value;
- return $this;
- }
- /**
- * Get an information about a document. (array)
- *
- * @since 15.0.0
- *
- * @param string $info
- * @param array $default
- *
- * @return array
- */
- final public function getInfoArray(string $info, array $default = []): array {
- if (!key_exists($info, $this->info)) {
- return $default;
- }
- return $this->info[$info];
- }
- /**
- * Set some information about the original document that will be available
- * to the front-end when displaying search result. (as int)
- * Because this information will not be indexed, this method can also be
- * used to manage some data while filling the IIndexDocument before its
- * indexing.
- *
- * @since 15.0.0
- *
- * @param string $info
- * @param int $value
- *
- * @return IIndexDocument
- */
- final public function setInfoInt(string $info, int $value): IIndexDocument {
- $this->info[$info] = $value;
- return $this;
- }
- /**
- * Get an information about a document. (int)
- *
- * @since 15.0.0
- *
- * @param string $info
- * @param int $default
- *
- * @return int
- */
- final public function getInfoInt(string $info, int $default = 0): int {
- if (!key_exists($info, $this->info)) {
- return $default;
- }
- return $this->info[$info];
- }
- /**
- * Set some information about the original document that will be available
- * to the front-end when displaying search result. (as bool)
- * Because this information will not be indexed, this method can also be
- * used to manage some data while filling the IIndexDocument before its
- * indexing.
- *
- * @since 15.0.0
- *
- * @param string $info
- * @param bool $value
- *
- * @return IIndexDocument
- */
- final public function setInfoBool(string $info, bool $value): IIndexDocument {
- $this->info[$info] = $value;
- return $this;
- }
- /**
- * Get an information about a document. (bool)
- *
- * @since 15.0.0
- *
- * @param string $info
- * @param bool $default
- *
- * @return bool
- */
- final public function getInfoBool(string $info, bool $default = false): bool {
- if (!key_exists($info, $this->info)) {
- return $default;
- }
- return $this->info[$info];
- }
- /**
- * Get all info.
- *
- * @since 15.0.0
- *
- * @return array
- */
- final public function getInfoAll(): array {
- $info = [];
- foreach ($this->info as $k => $v) {
- if (substr($k, 0, 1) === '_') {
- continue;
- }
- $info[$k] = $v;
- }
- return $info;
- }
- /**
- * @since 15.0.0
- *
- * On some version of PHP, it is better to force destruct the object.
- * And during the index, the number of generated IIndexDocument can be
- * _huge_.
- */
- public function __destruct() {
- unset($this->id);
- unset($this->providerId);
- unset($this->access);
- unset($this->modifiedTime);
- unset($this->title);
- unset($this->content);
- unset($this->hash);
- unset($this->link);
- unset($this->source);
- unset($this->tags);
- unset($this->metaTags);
- unset($this->subTags);
- unset($this->more);
- unset($this->excerpts);
- unset($this->score);
- unset($this->info);
- unset($this->contentEncoded);
- }
- /**
- * @since 15.0.0
- *
- * @return array
- */
- public function jsonSerialize() {
- return [
- 'id' => $this->getId(),
- 'providerId' => $this->getProviderId(),
- 'access' => $this->access,
- 'modifiedTime' => $this->getModifiedTime(),
- 'title' => $this->getTitle(),
- 'link' => $this->getLink(),
- 'index' => $this->index,
- 'source' => $this->getSource(),
- 'info' => $this->getInfoAll(),
- 'hash' => $this->getHash(),
- 'contentSize' => $this->getContentSize(),
- 'tags' => $this->getTags(),
- 'metatags' => $this->getMetaTags(),
- 'subtags' => $this->getSubTags(),
- 'more' => $this->getMore(),
- 'excerpts' => $this->getExcerpts(),
- 'score' => $this->getScore()
- ];
- }
- }
|